From 4a16efa3e43e35f0cc9efe3a67f620f0017c3d36 Mon Sep 17 00:00:00 2001 From: Dimitry Andric Date: Mon, 8 Apr 2013 18:41:23 +0000 Subject: Vendor import of llvm trunk r178860: http://llvm.org/svn/llvm-project/llvm/trunk@178860 --- lib/Analysis/AliasAnalysis.cpp | 70 +- lib/Analysis/AliasAnalysisCounter.cpp | 2 +- lib/Analysis/AliasAnalysisEvaluator.cpp | 86 +- lib/Analysis/AliasDebugger.cpp | 10 +- lib/Analysis/AliasSetTracker.cpp | 12 +- lib/Analysis/Analysis.cpp | 4 +- lib/Analysis/BasicAliasAnalysis.cpp | 95 +- lib/Analysis/BlockFrequencyInfo.cpp | 6 +- lib/Analysis/BranchProbabilityInfo.cpp | 12 +- lib/Analysis/CFGPrinter.cpp | 1 - lib/Analysis/CMakeLists.txt | 4 +- lib/Analysis/CaptureTracking.cpp | 5 + lib/Analysis/CodeMetrics.cpp | 144 +- lib/Analysis/ConstantFolding.cpp | 256 +- lib/Analysis/CostModel.cpp | 122 +- lib/Analysis/DbgInfoPrinter.cpp | 224 - lib/Analysis/DependenceAnalysis.cpp | 302 +- lib/Analysis/DominanceFrontier.cpp | 2 +- lib/Analysis/IPA/CMakeLists.txt | 2 + lib/Analysis/IPA/CallGraph.cpp | 6 +- lib/Analysis/IPA/CallGraphSCCPass.cpp | 13 +- lib/Analysis/IPA/CallPrinter.cpp | 87 + lib/Analysis/IPA/FindUsedTypes.cpp | 6 +- lib/Analysis/IPA/GlobalsModRef.cpp | 16 +- lib/Analysis/IPA/IPA.cpp | 2 + lib/Analysis/IPA/InlineCost.cpp | 1239 ++++ lib/Analysis/IVUsers.cpp | 12 +- lib/Analysis/InlineCost.cpp | 1067 ---- lib/Analysis/InstCount.cpp | 10 +- lib/Analysis/InstructionSimplify.cpp | 506 +- lib/Analysis/Interval.cpp | 2 +- lib/Analysis/LazyValueInfo.cpp | 19 +- lib/Analysis/LibCallAliasAnalysis.cpp | 4 +- lib/Analysis/LibCallSemantics.cpp | 2 +- lib/Analysis/Lint.cpp | 102 +- lib/Analysis/Loads.cpp | 57 +- lib/Analysis/LoopInfo.cpp | 74 +- lib/Analysis/MemDepPrinter.cpp | 8 +- lib/Analysis/MemoryBuiltins.cpp | 131 +- lib/Analysis/MemoryDependenceAnalysis.cpp | 324 +- lib/Analysis/ModuleDebugInfoPrinter.cpp | 4 +- lib/Analysis/NoAliasAnalysis.cpp | 4 +- lib/Analysis/PHITransAddr.cpp | 6 +- lib/Analysis/PathNumbering.cpp | 15 +- lib/Analysis/PathProfileInfo.cpp | 7 +- lib/Analysis/PathProfileVerifier.cpp | 13 +- lib/Analysis/PostDominators.cpp | 8 +- lib/Analysis/ProfileDataLoader.cpp | 6 +- lib/Analysis/ProfileDataLoaderPass.cpp | 22 +- lib/Analysis/ProfileEstimatorPass.cpp | 6 +- lib/Analysis/ProfileInfo.cpp | 16 +- lib/Analysis/ProfileInfoLoader.cpp | 4 +- lib/Analysis/ProfileInfoLoaderPass.cpp | 16 +- lib/Analysis/ProfileVerifierPass.cpp | 15 +- lib/Analysis/PtrUseVisitor.cpp | 36 + lib/Analysis/RegionInfo.cpp | 7 +- lib/Analysis/RegionPrinter.cpp | 12 +- lib/Analysis/ScalarEvolution.cpp | 61 +- lib/Analysis/ScalarEvolutionAliasAnalysis.cpp | 2 +- lib/Analysis/ScalarEvolutionExpander.cpp | 23 +- lib/Analysis/SparsePropagation.cpp | 6 +- lib/Analysis/TargetTransformInfo.cpp | 558 ++ lib/Analysis/Trace.cpp | 2 +- lib/Analysis/TypeBasedAliasAnalysis.cpp | 10 +- lib/Analysis/ValueTracking.cpp | 390 +- lib/Archive/Archive.cpp | 5 +- lib/Archive/ArchiveInternals.h | 3 +- lib/Archive/ArchiveReader.cpp | 27 +- lib/Archive/ArchiveWriter.cpp | 5 +- lib/AsmParser/LLLexer.cpp | 193 +- lib/AsmParser/LLLexer.h | 3 +- lib/AsmParser/LLParser.cpp | 688 ++- lib/AsmParser/LLParser.h | 71 +- lib/AsmParser/LLToken.h | 54 +- lib/AsmParser/Parser.cpp | 4 +- lib/Bitcode/Reader/BitReader.cpp | 14 +- lib/Bitcode/Reader/BitcodeReader.cpp | 686 ++- lib/Bitcode/Reader/BitcodeReader.h | 70 +- lib/Bitcode/Reader/BitstreamReader.cpp | 371 ++ lib/Bitcode/Reader/CMakeLists.txt | 1 + lib/Bitcode/Writer/BitWriter.cpp | 9 +- lib/Bitcode/Writer/BitcodeWriter.cpp | 210 +- lib/Bitcode/Writer/BitcodeWriterPass.cpp | 4 +- lib/Bitcode/Writer/ValueEnumerator.cpp | 64 +- lib/Bitcode/Writer/ValueEnumerator.h | 52 +- lib/CMakeLists.txt | 4 +- lib/CodeGen/AggressiveAntiDepBreaker.cpp | 27 +- lib/CodeGen/AggressiveAntiDepBreaker.h | 6 +- lib/CodeGen/AllocationOrder.cpp | 78 +- lib/CodeGen/AllocationOrder.h | 65 +- lib/CodeGen/Analysis.cpp | 53 +- lib/CodeGen/AsmPrinter/ARMException.cpp | 88 +- lib/CodeGen/AsmPrinter/AsmPrinter.cpp | 200 +- lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp | 56 +- lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp | 16 +- lib/CodeGen/AsmPrinter/CMakeLists.txt | 1 + lib/CodeGen/AsmPrinter/DIE.cpp | 32 +- lib/CodeGen/AsmPrinter/DIE.h | 26 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp | 18 +- lib/CodeGen/AsmPrinter/DwarfAccelTable.h | 32 +- lib/CodeGen/AsmPrinter/DwarfCFIException.cpp | 25 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp | 470 +- lib/CodeGen/AsmPrinter/DwarfCompileUnit.h | 74 +- lib/CodeGen/AsmPrinter/DwarfDebug.cpp | 1259 ++-- lib/CodeGen/AsmPrinter/DwarfDebug.h | 502 +- lib/CodeGen/AsmPrinter/DwarfException.cpp | 41 +- lib/CodeGen/AsmPrinter/DwarfException.h | 3 + lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp | 120 + lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp | 16 +- lib/CodeGen/AsmPrinter/Win64Exception.cpp | 19 +- lib/CodeGen/BasicTargetTransformInfo.cpp | 466 ++ lib/CodeGen/BranchFolding.cpp | 23 +- lib/CodeGen/CMakeLists.txt | 22 +- lib/CodeGen/CallingConvLower.cpp | 18 +- lib/CodeGen/CodeGen.cpp | 2 +- lib/CodeGen/CodePlacementOpt.cpp | 422 -- lib/CodeGen/CriticalAntiDepBreaker.cpp | 58 +- lib/CodeGen/CriticalAntiDepBreaker.h | 5 +- lib/CodeGen/DFAPacketizer.cpp | 4 +- lib/CodeGen/DeadMachineInstructionElim.cpp | 13 +- lib/CodeGen/DwarfEHPrepare.cpp | 14 +- lib/CodeGen/EarlyIfConversion.cpp | 10 +- lib/CodeGen/ErlangGC.cpp | 81 + lib/CodeGen/ExecutionDepsFix.cpp | 8 +- lib/CodeGen/ExpandISelPseudos.cpp | 4 +- lib/CodeGen/ExpandPostRAPseudos.cpp | 45 +- lib/CodeGen/GCMetadata.cpp | 45 +- lib/CodeGen/GCStrategy.cpp | 14 +- lib/CodeGen/IfConversion.cpp | 28 +- lib/CodeGen/InlineSpiller.cpp | 8 +- lib/CodeGen/InterferenceCache.cpp | 4 +- lib/CodeGen/InterferenceCache.h | 2 +- lib/CodeGen/IntrinsicLowering.cpp | 12 +- lib/CodeGen/LLVMBuild.txt | 2 +- lib/CodeGen/LLVMTargetMachine.cpp | 37 +- lib/CodeGen/LexicalScopes.cpp | 18 +- lib/CodeGen/LiveDebugVariables.cpp | 77 +- lib/CodeGen/LiveInterval.cpp | 372 +- lib/CodeGen/LiveIntervalAnalysis.cpp | 513 +- lib/CodeGen/LiveIntervalUnion.cpp | 31 +- lib/CodeGen/LiveIntervalUnion.h | 210 - lib/CodeGen/LiveRangeCalc.cpp | 94 +- lib/CodeGen/LiveRangeCalc.h | 29 +- lib/CodeGen/LiveRangeEdit.cpp | 8 +- lib/CodeGen/LiveRegMatrix.cpp | 10 +- lib/CodeGen/LiveRegMatrix.h | 148 - lib/CodeGen/LiveStackAnalysis.cpp | 4 +- lib/CodeGen/LiveVariables.cpp | 33 +- lib/CodeGen/LocalStackSlotAllocation.cpp | 20 +- lib/CodeGen/MachineBasicBlock.cpp | 212 +- lib/CodeGen/MachineBlockFrequencyInfo.cpp | 6 +- lib/CodeGen/MachineBlockPlacement.cpp | 18 +- lib/CodeGen/MachineBranchProbabilityInfo.cpp | 2 +- lib/CodeGen/MachineCSE.cpp | 12 +- lib/CodeGen/MachineCopyPropagation.cpp | 33 +- lib/CodeGen/MachineFunction.cpp | 173 +- lib/CodeGen/MachineFunctionPass.cpp | 2 +- lib/CodeGen/MachineFunctionPrinterPass.cpp | 4 +- lib/CodeGen/MachineInstr.cpp | 470 +- lib/CodeGen/MachineInstrBundle.cpp | 28 +- lib/CodeGen/MachineLICM.cpp | 20 +- lib/CodeGen/MachineLoopInfo.cpp | 2 +- lib/CodeGen/MachineLoopRanges.cpp | 116 - lib/CodeGen/MachineModuleInfo.cpp | 56 +- lib/CodeGen/MachineRegisterInfo.cpp | 63 +- lib/CodeGen/MachineSSAUpdater.cpp | 22 +- lib/CodeGen/MachineScheduler.cpp | 688 ++- lib/CodeGen/MachineSink.cpp | 14 +- lib/CodeGen/MachineTraceMetrics.cpp | 175 +- lib/CodeGen/MachineTraceMetrics.h | 350 -- lib/CodeGen/MachineVerifier.cpp | 45 +- lib/CodeGen/OptimizePHIs.cpp | 6 +- lib/CodeGen/PHIElimination.cpp | 270 +- lib/CodeGen/PHIEliminationUtils.cpp | 2 +- lib/CodeGen/Passes.cpp | 77 +- lib/CodeGen/PeepholeOptimizer.cpp | 21 +- lib/CodeGen/PostRASchedulerList.cpp | 68 +- lib/CodeGen/PrologEpilogInserter.cpp | 213 +- lib/CodeGen/PrologEpilogInserter.h | 6 +- lib/CodeGen/PseudoSourceValue.cpp | 8 +- lib/CodeGen/RegAllocBase.cpp | 4 +- lib/CodeGen/RegAllocBase.h | 4 +- lib/CodeGen/RegAllocBasic.cpp | 16 +- lib/CodeGen/RegAllocFast.cpp | 141 +- lib/CodeGen/RegAllocGreedy.cpp | 39 +- lib/CodeGen/RegAllocPBQP.cpp | 12 +- lib/CodeGen/RegisterClassInfo.cpp | 34 +- lib/CodeGen/RegisterCoalescer.cpp | 333 +- lib/CodeGen/RegisterPressure.cpp | 547 +- lib/CodeGen/RegisterScavenging.cpp | 138 +- lib/CodeGen/ScheduleDAG.cpp | 115 +- lib/CodeGen/ScheduleDAGInstrs.cpp | 727 ++- lib/CodeGen/ScheduleDAGPrinter.cpp | 16 +- lib/CodeGen/SelectionDAG/DAGCombiner.cpp | 650 +- lib/CodeGen/SelectionDAG/FastISel.cpp | 80 +- lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp | 35 +- lib/CodeGen/SelectionDAG/InstrEmitter.cpp | 168 +- lib/CodeGen/SelectionDAG/InstrEmitter.h | 16 +- lib/CodeGen/SelectionDAG/LegalizeDAG.cpp | 299 +- lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp | 568 +- lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp | 81 +- lib/CodeGen/SelectionDAG/LegalizeTypes.cpp | 73 +- lib/CodeGen/SelectionDAG/LegalizeTypes.h | 29 +- lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp | 2 +- lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp | 190 +- lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp | 78 +- lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp | 26 +- lib/CodeGen/SelectionDAG/SDNodeDbgValue.h | 2 +- lib/CodeGen/SelectionDAG/SDNodeOrdering.h | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp | 18 +- lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp | 149 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp | 22 +- lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h | 6 +- lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp | 14 +- lib/CodeGen/SelectionDAG/SelectionDAG.cpp | 425 +- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp | 697 +-- lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h | 15 +- lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp | 17 +- lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp | 229 +- lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp | 14 +- lib/CodeGen/SelectionDAG/TargetLowering.cpp | 1294 +--- lib/CodeGen/ShadowStackGC.cpp | 8 +- lib/CodeGen/ShrinkWrapping.cpp | 19 +- lib/CodeGen/SjLjEHPrepare.cpp | 39 +- lib/CodeGen/SlotIndexes.cpp | 70 + lib/CodeGen/SpillPlacement.cpp | 1 + lib/CodeGen/Spiller.cpp | 6 +- lib/CodeGen/SplitKit.cpp | 2 +- lib/CodeGen/StackColoring.cpp | 175 +- lib/CodeGen/StackProtector.cpp | 147 +- lib/CodeGen/StackSlotColoring.cpp | 10 +- lib/CodeGen/StrongPHIElimination.cpp | 8 +- lib/CodeGen/TailDuplication.cpp | 29 +- lib/CodeGen/TargetFrameLoweringImpl.cpp | 3 +- lib/CodeGen/TargetInstrInfo.cpp | 739 +++ lib/CodeGen/TargetInstrInfoImpl.cpp | 681 --- lib/CodeGen/TargetLoweringBase.cpp | 1305 ++++ lib/CodeGen/TargetLoweringObjectFileImpl.cpp | 152 +- lib/CodeGen/TargetRegisterInfo.cpp | 285 + lib/CodeGen/TargetSchedule.cpp | 9 +- lib/CodeGen/TwoAddressInstructionPass.cpp | 593 +- lib/CodeGen/UnreachableBlockElim.cpp | 16 +- lib/CodeGen/VirtRegMap.cpp | 37 +- lib/CodeGen/VirtRegMap.h | 190 - lib/DebugInfo/CMakeLists.txt | 1 + lib/DebugInfo/DIContext.cpp | 13 +- lib/DebugInfo/DWARFAbbreviationDeclaration.cpp | 18 +- lib/DebugInfo/DWARFAbbreviationDeclaration.h | 10 +- lib/DebugInfo/DWARFCompileUnit.cpp | 15 +- lib/DebugInfo/DWARFCompileUnit.h | 27 +- lib/DebugInfo/DWARFContext.cpp | 383 +- lib/DebugInfo/DWARFContext.h | 117 +- lib/DebugInfo/DWARFDebugArangeSet.cpp | 32 +- lib/DebugInfo/DWARFDebugArangeSet.h | 8 +- lib/DebugInfo/DWARFDebugAranges.cpp | 51 +- lib/DebugInfo/DWARFDebugAranges.h | 8 +- lib/DebugInfo/DWARFDebugFrame.cpp | 391 ++ lib/DebugInfo/DWARFDebugFrame.h | 46 + lib/DebugInfo/DWARFDebugInfoEntry.cpp | 30 +- lib/DebugInfo/DWARFDebugLine.cpp | 92 +- lib/DebugInfo/DWARFDebugLine.h | 8 + lib/DebugInfo/DWARFFormValue.cpp | 95 +- lib/DebugInfo/DWARFFormValue.h | 4 + lib/DebugInfo/DWARFRelocMap.h | 22 + lib/ExecutionEngine/EventListenerCommon.h | 6 +- lib/ExecutionEngine/ExecutionEngine.cpp | 197 +- lib/ExecutionEngine/ExecutionEngineBindings.cpp | 2 +- .../IntelJITEvents/IntelJITEventListener.cpp | 115 +- .../IntelJITEvents/IntelJITEventsWrapper.h | 6 - lib/ExecutionEngine/Interpreter/Execution.cpp | 53 +- .../Interpreter/ExternalFunctions.cpp | 12 +- lib/ExecutionEngine/Interpreter/Interpreter.cpp | 4 +- lib/ExecutionEngine/Interpreter/Interpreter.h | 7 +- lib/ExecutionEngine/JIT/JIT.cpp | 23 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp | 10 +- lib/ExecutionEngine/JIT/JITDwarfEmitter.h | 4 + lib/ExecutionEngine/JIT/JITEmitter.cpp | 60 +- lib/ExecutionEngine/JIT/JITMemoryManager.cpp | 28 +- lib/ExecutionEngine/MCJIT/CMakeLists.txt | 1 + lib/ExecutionEngine/MCJIT/LLVMBuild.txt | 2 +- lib/ExecutionEngine/MCJIT/MCJIT.cpp | 17 +- lib/ExecutionEngine/MCJIT/MCJIT.h | 2 +- lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp | 226 + .../OProfileJIT/OProfileJITEventListener.cpp | 2 +- .../OProfileJIT/OProfileWrapper.cpp | 1 + lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp | 8 +- .../RuntimeDyld/ObjectImageCommon.h | 154 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp | 49 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp | 126 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h | 4 +- .../RuntimeDyld/RuntimeDyldMachO.cpp | 5 +- lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h | 2 +- lib/ExecutionEngine/TargetSelect.cpp | 9 +- lib/IR/AsmWriter.cpp | 2236 +++++++ lib/IR/AttributeImpl.h | 278 + lib/IR/Attributes.cpp | 1180 ++++ lib/IR/AutoUpgrade.cpp | 393 ++ lib/IR/BasicBlock.cpp | 371 ++ lib/IR/CMakeLists.txt | 51 + lib/IR/ConstantFold.cpp | 2074 +++++++ lib/IR/ConstantFold.h | 56 + lib/IR/Constants.cpp | 2779 +++++++++ lib/IR/ConstantsContext.h | 774 +++ lib/IR/Core.cpp | 2458 ++++++++ lib/IR/DIBuilder.cpp | 1101 ++++ lib/IR/DataLayout.cpp | 697 +++ lib/IR/DebugInfo.cpp | 1209 ++++ lib/IR/DebugLoc.cpp | 315 + lib/IR/Dominators.cpp | 302 + lib/IR/Function.cpp | 707 +++ lib/IR/GCOV.cpp | 283 + lib/IR/GVMaterializer.cpp | 18 + lib/IR/Globals.cpp | 269 + lib/IR/IRBuilder.cpp | 153 + lib/IR/InlineAsm.cpp | 295 + lib/IR/Instruction.cpp | 555 ++ lib/IR/Instructions.cpp | 3553 +++++++++++ lib/IR/IntrinsicInst.cpp | 73 + lib/IR/LLVMBuild.txt | 22 + lib/IR/LLVMContext.cpp | 168 + lib/IR/LLVMContextImpl.cpp | 156 + lib/IR/LLVMContextImpl.h | 367 ++ lib/IR/LeakDetector.cpp | 69 + lib/IR/LeaksContext.h | 92 + lib/IR/Makefile | 33 + lib/IR/Metadata.cpp | 745 +++ lib/IR/Module.cpp | 451 ++ lib/IR/Pass.cpp | 276 + lib/IR/PassManager.cpp | 1912 ++++++ lib/IR/PassRegistry.cpp | 209 + lib/IR/PrintModulePass.cpp | 136 + lib/IR/SymbolTableListTraitsImpl.h | 118 + lib/IR/Type.cpp | 767 +++ lib/IR/TypeFinder.cpp | 148 + lib/IR/Use.cpp | 145 + lib/IR/User.cpp | 90 + lib/IR/Value.cpp | 701 +++ lib/IR/ValueSymbolTable.cpp | 117 + lib/IR/ValueTypes.cpp | 277 + lib/IR/Verifier.cpp | 2144 +++++++ lib/IRReader/CMakeLists.txt | 3 + lib/IRReader/IRReader.cpp | 89 + lib/IRReader/LLVMBuild.txt | 22 + lib/IRReader/Makefile | 14 + lib/LLVMBuild.txt | 2 +- lib/Linker/CMakeLists.txt | 2 - lib/Linker/LLVMBuild.txt | 2 +- lib/Linker/LinkArchives.cpp | 197 - lib/Linker/LinkItems.cpp | 241 - lib/Linker/LinkModules.cpp | 303 +- lib/Linker/Linker.cpp | 113 +- lib/MC/CMakeLists.txt | 1 - lib/MC/ELFObjectWriter.cpp | 81 +- lib/MC/MCAsmInfo.cpp | 3 + lib/MC/MCAsmStreamer.cpp | 106 +- lib/MC/MCAssembler.cpp | 431 +- lib/MC/MCContext.cpp | 76 +- lib/MC/MCDisassembler/CMakeLists.txt | 5 - lib/MC/MCDisassembler/Disassembler.cpp | 87 +- lib/MC/MCDisassembler/Disassembler.h | 6 +- lib/MC/MCDisassembler/EDDisassembler.cpp | 400 -- lib/MC/MCDisassembler/EDDisassembler.h | 271 - lib/MC/MCDisassembler/EDInfo.h | 84 - lib/MC/MCDisassembler/EDInst.cpp | 212 - lib/MC/MCDisassembler/EDInst.h | 182 - lib/MC/MCDisassembler/EDMain.cpp | 276 - lib/MC/MCDisassembler/EDOperand.cpp | 315 - lib/MC/MCDisassembler/EDOperand.h | 91 - lib/MC/MCDisassembler/EDToken.cpp | 214 - lib/MC/MCDisassembler/EDToken.h | 139 - lib/MC/MCDwarf.cpp | 294 +- lib/MC/MCELF.cpp | 17 +- lib/MC/MCELF.h | 35 - lib/MC/MCELFObjectTargetWriter.cpp | 5 - lib/MC/MCELFStreamer.cpp | 341 +- lib/MC/MCExpr.cpp | 23 +- lib/MC/MCInstPrinter.cpp | 13 +- lib/MC/MCMachOStreamer.cpp | 52 +- lib/MC/MCNullStreamer.cpp | 21 +- lib/MC/MCObjectFileInfo.cpp | 79 +- lib/MC/MCObjectStreamer.cpp | 101 +- lib/MC/MCParser/AsmLexer.cpp | 84 +- lib/MC/MCParser/AsmParser.cpp | 3081 ++++++---- lib/MC/MCParser/COFFAsmParser.cpp | 83 +- lib/MC/MCParser/DarwinAsmParser.cpp | 251 +- lib/MC/MCParser/ELFAsmParser.cpp | 103 +- lib/MC/MCParser/MCAsmLexer.cpp | 2 +- lib/MC/MCParser/MCAsmParser.cpp | 8 +- lib/MC/MCPureStreamer.cpp | 36 +- lib/MC/MCSection.cpp | 2 +- lib/MC/MCSectionMachO.cpp | 4 +- lib/MC/MCStreamer.cpp | 159 +- lib/MC/MCSubtargetInfo.cpp | 4 +- lib/MC/MCTargetAsmLexer.cpp | 16 - lib/MC/MCWin64EH.cpp | 8 +- lib/MC/MachObjectWriter.cpp | 72 +- lib/MC/WinCOFFObjectWriter.cpp | 75 +- lib/MC/WinCOFFStreamer.cpp | 126 +- lib/Makefile | 5 +- lib/Object/Archive.cpp | 279 +- lib/Object/COFFObjectFile.cpp | 2 +- lib/Object/ELFObjectFile.cpp | 46 +- lib/Object/MachOObject.cpp | 18 +- lib/Object/MachOObjectFile.cpp | 104 +- lib/Object/ObjectFile.cpp | 4 +- lib/Option/Arg.cpp | 122 + lib/Option/ArgList.cpp | 385 ++ lib/Option/CMakeLists.txt | 8 + lib/Option/LLVMBuild.txt | 22 + lib/Option/Makefile | 14 + lib/Option/OptTable.cpp | 387 ++ lib/Option/Option.cpp | 202 + lib/Support/APFloat.cpp | 96 +- lib/Support/APInt.cpp | 52 +- lib/Support/Allocator.cpp | 10 +- lib/Support/CMakeLists.txt | 6 + lib/Support/CommandLine.cpp | 24 +- lib/Support/ConstantRange.cpp | 2 +- lib/Support/ConvertUTF.c | 571 ++ lib/Support/ConvertUTFWrapper.cpp | 76 + lib/Support/CrashRecoveryContext.cpp | 4 +- lib/Support/DataStream.cpp | 4 +- lib/Support/Debug.cpp | 6 +- lib/Support/Disassembler.cpp | 5 +- lib/Support/Dwarf.cpp | 19 +- lib/Support/DynamicLibrary.cpp | 8 +- lib/Support/ErrorHandling.cpp | 24 +- lib/Support/FileOutputBuffer.cpp | 83 +- lib/Support/FileUtilities.cpp | 12 +- lib/Support/FoldingSet.cpp | 10 +- lib/Support/GraphWriter.cpp | 15 +- lib/Support/Host.cpp | 107 +- lib/Support/LocaleWindows.inc | 2 +- lib/Support/LocaleXlocale.inc | 2 +- lib/Support/LockFileManager.cpp | 22 +- lib/Support/Memory.cpp | 2 +- lib/Support/MemoryBuffer.cpp | 105 +- lib/Support/Path.cpp | 3 +- lib/Support/PathV2.cpp | 29 +- lib/Support/PluginLoader.cpp | 4 +- lib/Support/PrettyStackTrace.cpp | 12 +- lib/Support/Process.cpp | 60 +- lib/Support/Program.cpp | 11 +- lib/Support/Regex.cpp | 8 +- lib/Support/SmallPtrSet.cpp | 24 +- lib/Support/SourceMgr.cpp | 197 +- lib/Support/Statistic.cpp | 22 +- lib/Support/StringRef.cpp | 3 +- lib/Support/Threading.cpp | 2 +- lib/Support/TimeValue.cpp | 9 +- lib/Support/Timer.cpp | 8 +- lib/Support/Triple.cpp | 24 +- lib/Support/Unix/Memory.inc | 36 +- lib/Support/Unix/PathV2.inc | 43 +- lib/Support/Unix/Process.inc | 101 +- lib/Support/Unix/Program.inc | 32 +- lib/Support/Unix/Signals.inc | 44 +- lib/Support/Unix/TimeValue.inc | 3 +- lib/Support/Unix/Unix.h | 6 +- lib/Support/Unix/Watchdog.inc | 32 + lib/Support/Watchdog.cpp | 23 + lib/Support/Windows/Memory.inc | 2 + lib/Support/Windows/Path.inc | 4 +- lib/Support/Windows/PathV2.inc | 65 +- lib/Support/Windows/Process.inc | 87 +- lib/Support/Windows/Program.inc | 26 +- lib/Support/Windows/Signals.inc | 6 +- lib/Support/Windows/Watchdog.inc | 24 + lib/Support/YAMLParser.cpp | 27 +- lib/Support/YAMLTraits.cpp | 827 +++ lib/Support/raw_ostream.cpp | 22 +- lib/Support/regcomp.c | 30 +- lib/Support/system_error.cpp | 2 +- lib/TableGen/Error.cpp | 7 +- lib/TableGen/Main.cpp | 15 +- lib/TableGen/Record.cpp | 66 +- lib/TableGen/TGLexer.cpp | 22 +- lib/TableGen/TGLexer.h | 17 +- lib/TableGen/TGParser.cpp | 255 +- lib/TableGen/TGParser.h | 10 +- lib/TableGen/TableGenBackend.cpp | 31 +- lib/Target/AArch64/AArch64.h | 42 + lib/Target/AArch64/AArch64.td | 70 + lib/Target/AArch64/AArch64AsmPrinter.cpp | 347 ++ lib/Target/AArch64/AArch64AsmPrinter.h | 80 + lib/Target/AArch64/AArch64BranchFixupPass.cpp | 600 ++ lib/Target/AArch64/AArch64CallingConv.td | 196 + lib/Target/AArch64/AArch64FrameLowering.cpp | 633 ++ lib/Target/AArch64/AArch64FrameLowering.h | 108 + lib/Target/AArch64/AArch64ISelDAGToDAG.cpp | 415 ++ lib/Target/AArch64/AArch64ISelLowering.cpp | 2975 +++++++++ lib/Target/AArch64/AArch64ISelLowering.h | 247 + lib/Target/AArch64/AArch64InstrFormats.td | 961 +++ lib/Target/AArch64/AArch64InstrInfo.cpp | 822 +++ lib/Target/AArch64/AArch64InstrInfo.h | 112 + lib/Target/AArch64/AArch64InstrInfo.td | 5099 ++++++++++++++++ lib/Target/AArch64/AArch64MCInstLower.cpp | 140 + lib/Target/AArch64/AArch64MachineFunctionInfo.cpp | 18 + lib/Target/AArch64/AArch64MachineFunctionInfo.h | 149 + lib/Target/AArch64/AArch64RegisterInfo.cpp | 171 + lib/Target/AArch64/AArch64RegisterInfo.h | 76 + lib/Target/AArch64/AArch64RegisterInfo.td | 203 + lib/Target/AArch64/AArch64Schedule.td | 10 + lib/Target/AArch64/AArch64SelectionDAGInfo.cpp | 25 + lib/Target/AArch64/AArch64SelectionDAGInfo.h | 32 + lib/Target/AArch64/AArch64Subtarget.cpp | 43 + lib/Target/AArch64/AArch64Subtarget.h | 54 + lib/Target/AArch64/AArch64TargetMachine.cpp | 81 + lib/Target/AArch64/AArch64TargetMachine.h | 69 + lib/Target/AArch64/AArch64TargetObjectFile.cpp | 24 + lib/Target/AArch64/AArch64TargetObjectFile.h | 31 + lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp | 2197 +++++++ lib/Target/AArch64/AsmParser/CMakeLists.txt | 7 + lib/Target/AArch64/AsmParser/LLVMBuild.txt | 24 + lib/Target/AArch64/AsmParser/Makefile | 15 + lib/Target/AArch64/CMakeLists.txt | 36 + .../AArch64/Disassembler/AArch64Disassembler.cpp | 803 +++ lib/Target/AArch64/Disassembler/CMakeLists.txt | 7 + lib/Target/AArch64/Disassembler/LLVMBuild.txt | 24 + lib/Target/AArch64/Disassembler/Makefile | 16 + .../AArch64/InstPrinter/AArch64InstPrinter.cpp | 408 ++ .../AArch64/InstPrinter/AArch64InstPrinter.h | 172 + lib/Target/AArch64/InstPrinter/CMakeLists.txt | 8 + lib/Target/AArch64/InstPrinter/LLVMBuild.txt | 24 + lib/Target/AArch64/InstPrinter/Makefile | 15 + lib/Target/AArch64/LLVMBuild.txt | 36 + .../AArch64/MCTargetDesc/AArch64AsmBackend.cpp | 585 ++ .../MCTargetDesc/AArch64ELFObjectWriter.cpp | 292 + .../AArch64/MCTargetDesc/AArch64ELFStreamer.cpp | 160 + .../AArch64/MCTargetDesc/AArch64ELFStreamer.h | 27 + .../AArch64/MCTargetDesc/AArch64FixupKinds.h | 113 + .../AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp | 41 + lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h | 27 + .../AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp | 502 ++ lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp | 178 + lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h | 167 + .../AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp | 194 + .../AArch64/MCTargetDesc/AArch64MCTargetDesc.h | 65 + lib/Target/AArch64/MCTargetDesc/CMakeLists.txt | 13 + lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt | 24 + lib/Target/AArch64/MCTargetDesc/Makefile | 16 + lib/Target/AArch64/Makefile | 30 + lib/Target/AArch64/README.txt | 2 + .../AArch64/TargetInfo/AArch64TargetInfo.cpp | 24 + lib/Target/AArch64/TargetInfo/CMakeLists.txt | 7 + lib/Target/AArch64/TargetInfo/LLVMBuild.txt | 24 + lib/Target/AArch64/TargetInfo/Makefile | 15 + lib/Target/AArch64/Utils/AArch64BaseInfo.cpp | 1103 ++++ lib/Target/AArch64/Utils/AArch64BaseInfo.h | 1068 ++++ lib/Target/AArch64/Utils/CMakeLists.txt | 5 + lib/Target/AArch64/Utils/LLVMBuild.txt | 23 + lib/Target/AArch64/Utils/Makefile | 15 + lib/Target/ARM/A15SDOptimizer.cpp | 704 +++ lib/Target/ARM/ARM.h | 4 + lib/Target/ARM/ARM.td | 35 +- lib/Target/ARM/ARMAsmPrinter.cpp | 701 +-- lib/Target/ARM/ARMAsmPrinter.h | 10 +- lib/Target/ARM/ARMBaseInstrInfo.cpp | 71 +- lib/Target/ARM/ARMBaseInstrInfo.h | 8 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 431 +- lib/Target/ARM/ARMBaseRegisterInfo.h | 23 +- lib/Target/ARM/ARMCallingConv.h | 2 +- lib/Target/ARM/ARMCodeEmitter.cpp | 26 +- lib/Target/ARM/ARMConstantIslandPass.cpp | 18 +- lib/Target/ARM/ARMConstantPoolValue.cpp | 29 +- lib/Target/ARM/ARMConstantPoolValue.h | 6 +- lib/Target/ARM/ARMExpandPseudoInsts.cpp | 4 +- lib/Target/ARM/ARMFastISel.cpp | 297 +- lib/Target/ARM/ARMFrameLowering.cpp | 126 +- lib/Target/ARM/ARMFrameLowering.h | 5 + lib/Target/ARM/ARMISelDAGToDAG.cpp | 445 +- lib/Target/ARM/ARMISelLowering.cpp | 699 ++- lib/Target/ARM/ARMISelLowering.h | 38 +- lib/Target/ARM/ARMInstrInfo.cpp | 4 +- lib/Target/ARM/ARMInstrInfo.td | 169 +- lib/Target/ARM/ARMInstrNEON.td | 24 +- lib/Target/ARM/ARMInstrThumb2.td | 26 +- lib/Target/ARM/ARMJITInfo.cpp | 4 +- lib/Target/ARM/ARMJITInfo.h | 4 +- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 200 +- lib/Target/ARM/ARMMCInstLower.cpp | 2 +- lib/Target/ARM/ARMMachineFunctionInfo.h | 4 +- lib/Target/ARM/ARMSchedule.td | 71 + lib/Target/ARM/ARMScheduleA9.td | 56 +- lib/Target/ARM/ARMScheduleSwift.td | 61 +- lib/Target/ARM/ARMSelectionDAGInfo.cpp | 2 +- lib/Target/ARM/ARMSubtarget.cpp | 126 +- lib/Target/ARM/ARMSubtarget.h | 34 +- lib/Target/ARM/ARMTargetMachine.cpp | 32 +- lib/Target/ARM/ARMTargetMachine.h | 38 +- lib/Target/ARM/ARMTargetObjectFile.cpp | 15 +- lib/Target/ARM/ARMTargetObjectFile.h | 5 + lib/Target/ARM/ARMTargetTransformInfo.cpp | 458 ++ lib/Target/ARM/AsmParser/ARMAsmLexer.cpp | 138 - lib/Target/ARM/AsmParser/ARMAsmParser.cpp | 645 +- lib/Target/ARM/AsmParser/CMakeLists.txt | 1 - lib/Target/ARM/CMakeLists.txt | 3 +- lib/Target/ARM/Disassembler/ARMDisassembler.cpp | 42 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.cpp | 59 +- lib/Target/ARM/InstPrinter/ARMInstPrinter.h | 1 + lib/Target/ARM/LICENSE.TXT | 47 + lib/Target/ARM/MCTargetDesc/ARMAsmBackend.cpp | 153 +- lib/Target/ARM/MCTargetDesc/ARMELFObjectWriter.cpp | 23 +- lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp | 418 ++ lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h | 27 + lib/Target/ARM/MCTargetDesc/ARMMCCodeEmitter.cpp | 23 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.cpp | 2 +- lib/Target/ARM/MCTargetDesc/ARMMCExpr.h | 3 + lib/Target/ARM/MCTargetDesc/ARMMCTargetDesc.cpp | 20 +- .../ARM/MCTargetDesc/ARMMachObjectWriter.cpp | 5 +- lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h | 112 + lib/Target/ARM/MCTargetDesc/CMakeLists.txt | 1 + lib/Target/ARM/MLxExpansionPass.cpp | 8 +- lib/Target/ARM/Makefile | 2 +- lib/Target/ARM/README-Thumb.txt | 2 - lib/Target/ARM/TargetInfo/ARMTargetInfo.cpp | 2 +- lib/Target/ARM/Thumb1FrameLowering.cpp | 48 +- lib/Target/ARM/Thumb1FrameLowering.h | 4 + lib/Target/ARM/Thumb1InstrInfo.cpp | 2 +- lib/Target/ARM/Thumb1RegisterInfo.cpp | 91 +- lib/Target/ARM/Thumb1RegisterInfo.h | 8 +- lib/Target/ARM/Thumb2ITBlockPass.cpp | 6 +- lib/Target/ARM/Thumb2InstrInfo.cpp | 6 +- lib/Target/ARM/Thumb2RegisterInfo.cpp | 6 +- lib/Target/ARM/Thumb2SizeReduction.cpp | 383 +- lib/Target/CMakeLists.txt | 3 - lib/Target/CellSPU/CMakeLists.txt | 30 - lib/Target/CellSPU/CellSDKIntrinsics.td | 449 -- lib/Target/CellSPU/LLVMBuild.txt | 32 - lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt | 6 - lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt | 23 - lib/Target/CellSPU/MCTargetDesc/Makefile | 16 - lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp | 43 - lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h | 30 - .../CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp | 94 - lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h | 38 - lib/Target/CellSPU/Makefile | 20 - lib/Target/CellSPU/README.txt | 106 - lib/Target/CellSPU/SPU.h | 31 - lib/Target/CellSPU/SPU.td | 66 - lib/Target/CellSPU/SPU128InstrInfo.td | 41 - lib/Target/CellSPU/SPU64InstrInfo.td | 408 -- lib/Target/CellSPU/SPUAsmPrinter.cpp | 333 - lib/Target/CellSPU/SPUCallingConv.td | 53 - lib/Target/CellSPU/SPUFrameLowering.cpp | 256 - lib/Target/CellSPU/SPUFrameLowering.h | 80 - lib/Target/CellSPU/SPUHazardRecognizers.cpp | 135 - lib/Target/CellSPU/SPUHazardRecognizers.h | 37 - lib/Target/CellSPU/SPUISelDAGToDAG.cpp | 1192 ---- lib/Target/CellSPU/SPUISelLowering.cpp | 3266 ---------- lib/Target/CellSPU/SPUISelLowering.h | 178 - lib/Target/CellSPU/SPUInstrBuilder.h | 43 - lib/Target/CellSPU/SPUInstrFormats.td | 320 - lib/Target/CellSPU/SPUInstrInfo.cpp | 449 -- lib/Target/CellSPU/SPUInstrInfo.h | 84 - lib/Target/CellSPU/SPUInstrInfo.td | 4484 -------------- lib/Target/CellSPU/SPUMachineFunction.cpp | 14 - lib/Target/CellSPU/SPUMachineFunction.h | 50 - lib/Target/CellSPU/SPUMathInstr.td | 97 - lib/Target/CellSPU/SPUNodes.td | 159 - lib/Target/CellSPU/SPUNopFiller.cpp | 153 - lib/Target/CellSPU/SPUOperands.td | 664 -- lib/Target/CellSPU/SPURegisterInfo.cpp | 357 -- lib/Target/CellSPU/SPURegisterInfo.h | 106 - lib/Target/CellSPU/SPURegisterInfo.td | 183 - lib/Target/CellSPU/SPURegisterNames.h | 19 - lib/Target/CellSPU/SPUSchedule.td | 59 - lib/Target/CellSPU/SPUSelectionDAGInfo.cpp | 23 - lib/Target/CellSPU/SPUSelectionDAGInfo.h | 31 - lib/Target/CellSPU/SPUSubtarget.cpp | 65 - lib/Target/CellSPU/SPUSubtarget.h | 97 - lib/Target/CellSPU/SPUTargetMachine.cpp | 94 - lib/Target/CellSPU/SPUTargetMachine.h | 96 - lib/Target/CellSPU/TargetInfo/CMakeLists.txt | 7 - .../CellSPU/TargetInfo/CellSPUTargetInfo.cpp | 20 - lib/Target/CellSPU/TargetInfo/LLVMBuild.txt | 23 - lib/Target/CellSPU/TargetInfo/Makefile | 15 - lib/Target/CppBackend/CPPBackend.cpp | 111 +- lib/Target/CppBackend/CPPTargetMachine.h | 2 +- .../CppBackend/TargetInfo/CppBackendTargetInfo.cpp | 2 +- lib/Target/Hexagon/CMakeLists.txt | 5 +- lib/Target/Hexagon/Hexagon.h | 8 +- lib/Target/Hexagon/Hexagon.td | 101 + lib/Target/Hexagon/HexagonAsmPrinter.cpp | 36 +- lib/Target/Hexagon/HexagonCFGOptimizer.cpp | 11 +- lib/Target/Hexagon/HexagonCallingConvLower.cpp | 6 +- lib/Target/Hexagon/HexagonCallingConvLower.h | 4 +- lib/Target/Hexagon/HexagonExpandPredSpillCode.cpp | 11 +- lib/Target/Hexagon/HexagonFixupHwLoops.cpp | 183 + lib/Target/Hexagon/HexagonFrameLowering.cpp | 32 +- lib/Target/Hexagon/HexagonFrameLowering.h | 5 + lib/Target/Hexagon/HexagonHardwareLoops.cpp | 1673 ++++-- lib/Target/Hexagon/HexagonISelDAGToDAG.cpp | 176 +- lib/Target/Hexagon/HexagonISelLowering.cpp | 105 +- lib/Target/Hexagon/HexagonISelLowering.h | 17 +- lib/Target/Hexagon/HexagonImmediates.td | 508 -- lib/Target/Hexagon/HexagonInstrFormats.td | 445 +- lib/Target/Hexagon/HexagonInstrFormatsV4.td | 65 +- lib/Target/Hexagon/HexagonInstrInfo.cpp | 1280 ++-- lib/Target/Hexagon/HexagonInstrInfo.h | 27 +- lib/Target/Hexagon/HexagonInstrInfo.td | 2352 +++----- lib/Target/Hexagon/HexagonInstrInfoV4.td | 6330 +++++++------------- lib/Target/Hexagon/HexagonMCInst.h | 41 - lib/Target/Hexagon/HexagonMCInstLower.cpp | 6 +- lib/Target/Hexagon/HexagonMachineScheduler.cpp | 19 +- lib/Target/Hexagon/HexagonMachineScheduler.h | 8 +- lib/Target/Hexagon/HexagonNewValueJump.cpp | 36 +- lib/Target/Hexagon/HexagonOperands.td | 858 +++ lib/Target/Hexagon/HexagonPeephole.cpp | 8 +- lib/Target/Hexagon/HexagonRegisterInfo.cpp | 159 +- lib/Target/Hexagon/HexagonRegisterInfo.h | 14 +- lib/Target/Hexagon/HexagonRemoveSZExtArgs.cpp | 9 +- lib/Target/Hexagon/HexagonSchedule.td | 24 +- lib/Target/Hexagon/HexagonScheduleV4.td | 14 +- lib/Target/Hexagon/HexagonSplitTFRCondSets.cpp | 13 +- lib/Target/Hexagon/HexagonSubtarget.cpp | 17 +- lib/Target/Hexagon/HexagonSubtarget.h | 2 +- lib/Target/Hexagon/HexagonTargetMachine.cpp | 60 +- lib/Target/Hexagon/HexagonTargetMachine.h | 19 +- lib/Target/Hexagon/HexagonTargetObjectFile.cpp | 10 +- lib/Target/Hexagon/HexagonVLIWPacketizer.cpp | 598 +- .../Hexagon/InstPrinter/HexagonInstPrinter.cpp | 42 +- .../Hexagon/InstPrinter/HexagonInstPrinter.h | 17 +- lib/Target/Hexagon/InstPrinter/LLVMBuild.txt | 2 +- lib/Target/Hexagon/MCTargetDesc/CMakeLists.txt | 3 +- lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h | 141 +- .../Hexagon/MCTargetDesc/HexagonMCAsmInfo.cpp | 1 + lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp | 175 + lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h | 100 + .../Hexagon/MCTargetDesc/HexagonMCTargetDesc.cpp | 2 + .../Hexagon/TargetInfo/HexagonTargetInfo.cpp | 2 +- lib/Target/LLVMBuild.txt | 2 +- lib/Target/MBlaze/AsmParser/CMakeLists.txt | 1 - lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp | 115 - lib/Target/MBlaze/AsmParser/MBlazeAsmParser.cpp | 111 +- lib/Target/MBlaze/CMakeLists.txt | 1 - .../MBlaze/Disassembler/MBlazeDisassembler.cpp | 9 +- .../MBlaze/Disassembler/MBlazeDisassembler.h | 5 - .../MBlaze/InstPrinter/MBlazeInstPrinter.cpp | 4 +- lib/Target/MBlaze/MBlazeAsmPrinter.cpp | 26 +- lib/Target/MBlaze/MBlazeDelaySlotFiller.cpp | 4 +- lib/Target/MBlaze/MBlazeFrameLowering.cpp | 47 +- lib/Target/MBlaze/MBlazeFrameLowering.h | 4 + lib/Target/MBlaze/MBlazeISelDAGToDAG.cpp | 14 +- lib/Target/MBlaze/MBlazeISelLowering.cpp | 48 +- lib/Target/MBlaze/MBlazeISelLowering.h | 2 +- lib/Target/MBlaze/MBlazeInstrInfo.cpp | 4 +- lib/Target/MBlaze/MBlazeInstrInfo.td | 4 +- lib/Target/MBlaze/MBlazeIntrinsicInfo.cpp | 14 +- lib/Target/MBlaze/MBlazeMCInstLower.cpp | 8 +- lib/Target/MBlaze/MBlazeMachineFunction.h | 2 +- lib/Target/MBlaze/MBlazeRegisterInfo.cpp | 86 +- lib/Target/MBlaze/MBlazeRegisterInfo.h | 10 +- lib/Target/MBlaze/MBlazeSubtarget.h | 2 +- lib/Target/MBlaze/MBlazeTargetMachine.cpp | 5 +- lib/Target/MBlaze/MBlazeTargetMachine.h | 20 +- lib/Target/MBlaze/MBlazeTargetObjectFile.cpp | 8 +- .../MBlaze/MCTargetDesc/MBlazeAsmBackend.cpp | 8 +- .../MBlaze/MCTargetDesc/MBlazeMCCodeEmitter.cpp | 6 +- .../MBlaze/MCTargetDesc/MBlazeMCTargetDesc.cpp | 2 +- lib/Target/MBlaze/Makefile | 3 +- lib/Target/MBlaze/TargetInfo/MBlazeTargetInfo.cpp | 2 +- .../MSP430/InstPrinter/MSP430InstPrinter.cpp | 4 +- lib/Target/MSP430/MCTargetDesc/MSP430MCAsmInfo.cpp | 2 +- .../MSP430/MCTargetDesc/MSP430MCTargetDesc.cpp | 2 +- lib/Target/MSP430/MSP430AsmPrinter.cpp | 14 +- lib/Target/MSP430/MSP430BranchSelector.cpp | 6 +- lib/Target/MSP430/MSP430CallingConv.td | 3 + lib/Target/MSP430/MSP430FrameLowering.cpp | 76 +- lib/Target/MSP430/MSP430FrameLowering.h | 7 +- lib/Target/MSP430/MSP430ISelDAGToDAG.cpp | 12 +- lib/Target/MSP430/MSP430ISelLowering.cpp | 127 +- lib/Target/MSP430/MSP430ISelLowering.h | 4 +- lib/Target/MSP430/MSP430InstrInfo.cpp | 2 +- lib/Target/MSP430/MSP430InstrInfo.td | 4 +- lib/Target/MSP430/MSP430MCInstLower.cpp | 6 +- lib/Target/MSP430/MSP430MachineFunctionInfo.h | 6 + lib/Target/MSP430/MSP430RegisterInfo.cpp | 85 +- lib/Target/MSP430/MSP430RegisterInfo.h | 7 +- lib/Target/MSP430/MSP430TargetMachine.cpp | 4 +- lib/Target/MSP430/MSP430TargetMachine.h | 17 +- lib/Target/MSP430/TargetInfo/MSP430TargetInfo.cpp | 2 +- lib/Target/Mangler.cpp | 10 +- lib/Target/Mips/AsmParser/MipsAsmParser.cpp | 844 ++- lib/Target/Mips/CMakeLists.txt | 6 +- lib/Target/Mips/Disassembler/LLVMBuild.txt | 2 +- lib/Target/Mips/Disassembler/Makefile | 2 +- lib/Target/Mips/Disassembler/MipsDisassembler.cpp | 50 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.cpp | 10 +- lib/Target/Mips/InstPrinter/MipsInstPrinter.h | 3 + lib/Target/Mips/MCTargetDesc/CMakeLists.txt | 2 + lib/Target/Mips/MCTargetDesc/MipsAsmBackend.cpp | 3 +- lib/Target/Mips/MCTargetDesc/MipsBaseInfo.h | 93 - .../Mips/MCTargetDesc/MipsELFObjectWriter.cpp | 22 +- lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp | 89 + lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h | 43 + lib/Target/Mips/MCTargetDesc/MipsMCAsmInfo.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsMCCodeEmitter.cpp | 29 +- lib/Target/Mips/MCTargetDesc/MipsMCTargetDesc.cpp | 7 +- lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp | 80 + lib/Target/Mips/MCTargetDesc/MipsReginfo.h | 31 + lib/Target/Mips/Makefile | 2 +- lib/Target/Mips/Mips.h | 1 + lib/Target/Mips/Mips.td | 16 +- lib/Target/Mips/Mips16FrameLowering.cpp | 71 +- lib/Target/Mips/Mips16FrameLowering.h | 6 +- lib/Target/Mips/Mips16ISelDAGToDAG.cpp | 308 + lib/Target/Mips/Mips16ISelDAGToDAG.h | 51 + lib/Target/Mips/Mips16ISelLowering.cpp | 689 +++ lib/Target/Mips/Mips16ISelLowering.h | 80 + lib/Target/Mips/Mips16InstrFormats.td | 111 +- lib/Target/Mips/Mips16InstrInfo.cpp | 264 +- lib/Target/Mips/Mips16InstrInfo.h | 66 +- lib/Target/Mips/Mips16InstrInfo.td | 443 +- lib/Target/Mips/Mips16RegisterInfo.cpp | 86 +- lib/Target/Mips/Mips16RegisterInfo.h | 20 +- lib/Target/Mips/Mips64InstrInfo.td | 370 +- lib/Target/Mips/MipsAsmPrinter.cpp | 77 +- lib/Target/Mips/MipsAsmPrinter.h | 3 +- lib/Target/Mips/MipsCallingConv.td | 18 +- lib/Target/Mips/MipsCodeEmitter.cpp | 180 +- lib/Target/Mips/MipsCondMov.td | 181 +- lib/Target/Mips/MipsConstantIslandPass.cpp | 85 + lib/Target/Mips/MipsDSPInstrFormats.td | 5 +- lib/Target/Mips/MipsDSPInstrInfo.td | 280 +- lib/Target/Mips/MipsDelaySlotFiller.cpp | 778 ++- lib/Target/Mips/MipsFrameLowering.cpp | 8 +- lib/Target/Mips/MipsFrameLowering.h | 7 +- lib/Target/Mips/MipsISelDAGToDAG.cpp | 690 +-- lib/Target/Mips/MipsISelDAGToDAG.h | 93 + lib/Target/Mips/MipsISelLowering.cpp | 1710 +++--- lib/Target/Mips/MipsISelLowering.h | 207 +- lib/Target/Mips/MipsInstrFPU.td | 514 +- lib/Target/Mips/MipsInstrFormats.td | 552 +- lib/Target/Mips/MipsInstrInfo.cpp | 170 +- lib/Target/Mips/MipsInstrInfo.h | 45 + lib/Target/Mips/MipsInstrInfo.td | 1161 ++-- lib/Target/Mips/MipsJITInfo.cpp | 4 +- lib/Target/Mips/MipsLongBranch.cpp | 54 +- lib/Target/Mips/MipsMCInstLower.cpp | 2 +- lib/Target/Mips/MipsMachineFunction.cpp | 20 +- lib/Target/Mips/MipsMachineFunction.h | 18 +- lib/Target/Mips/MipsRegisterInfo.cpp | 61 +- lib/Target/Mips/MipsRegisterInfo.h | 11 +- lib/Target/Mips/MipsRegisterInfo.td | 140 +- lib/Target/Mips/MipsSEFrameLowering.cpp | 258 +- lib/Target/Mips/MipsSEFrameLowering.h | 7 +- lib/Target/Mips/MipsSEISelDAGToDAG.cpp | 473 ++ lib/Target/Mips/MipsSEISelDAGToDAG.h | 57 + lib/Target/Mips/MipsSEISelLowering.cpp | 442 ++ lib/Target/Mips/MipsSEISelLowering.h | 62 + lib/Target/Mips/MipsSEInstrInfo.cpp | 87 +- lib/Target/Mips/MipsSEInstrInfo.h | 26 +- lib/Target/Mips/MipsSERegisterInfo.cpp | 59 +- lib/Target/Mips/MipsSERegisterInfo.h | 4 +- lib/Target/Mips/MipsSubtarget.cpp | 9 +- lib/Target/Mips/MipsSubtarget.h | 38 +- lib/Target/Mips/MipsTargetMachine.cpp | 17 +- lib/Target/Mips/MipsTargetMachine.h | 31 +- lib/Target/Mips/MipsTargetObjectFile.cpp | 22 +- lib/Target/Mips/MipsTargetObjectFile.h | 2 + lib/Target/Mips/TargetInfo/MipsTargetInfo.cpp | 2 +- lib/Target/NVPTX/CMakeLists.txt | 2 +- lib/Target/NVPTX/MCTargetDesc/NVPTXBaseInfo.h | 38 +- lib/Target/NVPTX/MCTargetDesc/NVPTXMCAsmInfo.cpp | 12 +- .../NVPTX/MCTargetDesc/NVPTXMCTargetDesc.cpp | 11 +- lib/Target/NVPTX/ManagedStringPool.h | 1 - lib/Target/NVPTX/NVPTX.h | 34 +- lib/Target/NVPTX/NVPTX.td | 12 - lib/Target/NVPTX/NVPTXAllocaHoisting.cpp | 22 +- lib/Target/NVPTX/NVPTXAllocaHoisting.h | 2 +- lib/Target/NVPTX/NVPTXAsmPrinter.cpp | 998 ++- lib/Target/NVPTX/NVPTXAsmPrinter.h | 119 +- lib/Target/NVPTX/NVPTXFrameLowering.cpp | 54 +- lib/Target/NVPTX/NVPTXFrameLowering.h | 12 +- lib/Target/NVPTX/NVPTXISelDAGToDAG.cpp | 1642 ++++- lib/Target/NVPTX/NVPTXISelDAGToDAG.h | 17 +- lib/Target/NVPTX/NVPTXISelLowering.cpp | 1260 ++-- lib/Target/NVPTX/NVPTXISelLowering.h | 47 +- lib/Target/NVPTX/NVPTXInstrInfo.cpp | 143 +- lib/Target/NVPTX/NVPTXInstrInfo.h | 31 +- lib/Target/NVPTX/NVPTXInstrInfo.td | 96 +- lib/Target/NVPTX/NVPTXIntrinsics.td | 145 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.cpp | 71 +- lib/Target/NVPTX/NVPTXLowerAggrCopies.h | 4 +- lib/Target/NVPTX/NVPTXNumRegisters.h | 6 +- lib/Target/NVPTX/NVPTXRegisterInfo.cpp | 264 +- lib/Target/NVPTX/NVPTXRegisterInfo.h | 31 +- lib/Target/NVPTX/NVPTXRegisterInfo.td | 44 - lib/Target/NVPTX/NVPTXSection.h | 4 +- lib/Target/NVPTX/NVPTXSplitBBatBar.cpp | 20 +- lib/Target/NVPTX/NVPTXSplitBBatBar.h | 2 +- lib/Target/NVPTX/NVPTXSubtarget.cpp | 22 +- lib/Target/NVPTX/NVPTXSubtarget.h | 14 +- lib/Target/NVPTX/NVPTXTargetMachine.cpp | 85 +- lib/Target/NVPTX/NVPTXTargetMachine.h | 59 +- lib/Target/NVPTX/NVPTXTargetObjectFile.h | 77 +- lib/Target/NVPTX/NVPTXUtilities.cpp | 118 +- lib/Target/NVPTX/NVPTXUtilities.h | 18 +- lib/Target/NVPTX/NVPTXutil.cpp | 32 +- lib/Target/NVPTX/NVVMReflect.cpp | 177 + lib/Target/NVPTX/TargetInfo/NVPTXTargetInfo.cpp | 6 +- lib/Target/NVPTX/VectorElementize.cpp | 1248 ---- lib/Target/NVPTX/cl_common_defines.h | 123 +- lib/Target/NVPTX/gen-register-defs.py | 202 - lib/Target/PowerPC/CMakeLists.txt | 1 + lib/Target/PowerPC/InstPrinter/PPCInstPrinter.cpp | 30 +- lib/Target/PowerPC/MCTargetDesc/PPCAsmBackend.cpp | 56 +- lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h | 70 - .../PowerPC/MCTargetDesc/PPCELFObjectWriter.cpp | 124 +- lib/Target/PowerPC/MCTargetDesc/PPCFixupKinds.h | 21 +- lib/Target/PowerPC/MCTargetDesc/PPCMCAsmInfo.cpp | 10 +- .../PowerPC/MCTargetDesc/PPCMCCodeEmitter.cpp | 75 +- .../PowerPC/MCTargetDesc/PPCMCTargetDesc.cpp | 9 +- lib/Target/PowerPC/MCTargetDesc/PPCMCTargetDesc.h | 7 + lib/Target/PowerPC/MCTargetDesc/PPCPredicates.cpp | 2 +- lib/Target/PowerPC/MCTargetDesc/PPCPredicates.h | 8 +- lib/Target/PowerPC/PPC.h | 26 +- lib/Target/PowerPC/PPC.td | 149 +- lib/Target/PowerPC/PPCAsmPrinter.cpp | 525 +- lib/Target/PowerPC/PPCBranchSelector.cpp | 17 +- lib/Target/PowerPC/PPCCTRLoops.cpp | 103 +- lib/Target/PowerPC/PPCCallingConv.td | 68 +- lib/Target/PowerPC/PPCCodeEmitter.cpp | 20 +- lib/Target/PowerPC/PPCFrameLowering.cpp | 250 +- lib/Target/PowerPC/PPCFrameLowering.h | 21 +- lib/Target/PowerPC/PPCHazardRecognizers.cpp | 2 +- lib/Target/PowerPC/PPCISelDAGToDAG.cpp | 333 +- lib/Target/PowerPC/PPCISelLowering.cpp | 1260 +++- lib/Target/PowerPC/PPCISelLowering.h | 191 +- lib/Target/PowerPC/PPCInstr64Bit.td | 680 ++- lib/Target/PowerPC/PPCInstrAltivec.td | 635 +- lib/Target/PowerPC/PPCInstrFormats.td | 22 +- lib/Target/PowerPC/PPCInstrInfo.cpp | 268 +- lib/Target/PowerPC/PPCInstrInfo.h | 6 +- lib/Target/PowerPC/PPCInstrInfo.td | 932 +-- lib/Target/PowerPC/PPCJITInfo.cpp | 19 +- lib/Target/PowerPC/PPCJITInfo.h | 2 +- lib/Target/PowerPC/PPCMCInstLower.cpp | 9 +- lib/Target/PowerPC/PPCMachineFunctionInfo.h | 31 +- lib/Target/PowerPC/PPCRegisterInfo.cpp | 414 +- lib/Target/PowerPC/PPCRegisterInfo.h | 48 +- lib/Target/PowerPC/PPCRegisterInfo.td | 176 +- lib/Target/PowerPC/PPCScheduleA2.td | 15 + lib/Target/PowerPC/PPCScheduleG5.td | 15 + lib/Target/PowerPC/PPCSubtarget.cpp | 23 +- lib/Target/PowerPC/PPCSubtarget.h | 30 +- lib/Target/PowerPC/PPCTargetMachine.cpp | 18 +- lib/Target/PowerPC/PPCTargetMachine.h | 20 +- lib/Target/PowerPC/PPCTargetTransformInfo.cpp | 240 + lib/Target/PowerPC/README.txt | 21 - .../PowerPC/TargetInfo/PowerPCTargetInfo.cpp | 2 +- lib/Target/R600/AMDGPU.h | 51 + lib/Target/R600/AMDGPU.td | 41 + lib/Target/R600/AMDGPUAsmPrinter.cpp | 145 + lib/Target/R600/AMDGPUAsmPrinter.h | 44 + lib/Target/R600/AMDGPUCallingConv.td | 42 + lib/Target/R600/AMDGPUConvertToISA.cpp | 62 + lib/Target/R600/AMDGPUFrameLowering.cpp | 122 + lib/Target/R600/AMDGPUFrameLowering.h | 44 + lib/Target/R600/AMDGPUISelLowering.cpp | 414 ++ lib/Target/R600/AMDGPUISelLowering.h | 140 + lib/Target/R600/AMDGPUIndirectAddressing.cpp | 343 ++ lib/Target/R600/AMDGPUInstrInfo.cpp | 267 + lib/Target/R600/AMDGPUInstrInfo.h | 206 + lib/Target/R600/AMDGPUInstrInfo.td | 82 + lib/Target/R600/AMDGPUInstructions.td | 266 + lib/Target/R600/AMDGPUIntrinsics.td | 60 + lib/Target/R600/AMDGPUMCInstLower.cpp | 83 + lib/Target/R600/AMDGPUMCInstLower.h | 34 + lib/Target/R600/AMDGPUMachineFunction.cpp | 22 + lib/Target/R600/AMDGPUMachineFunction.h | 29 + lib/Target/R600/AMDGPURegisterInfo.cpp | 75 + lib/Target/R600/AMDGPURegisterInfo.h | 66 + lib/Target/R600/AMDGPURegisterInfo.td | 25 + lib/Target/R600/AMDGPUStructurizeCFG.cpp | 896 +++ lib/Target/R600/AMDGPUSubtarget.cpp | 87 + lib/Target/R600/AMDGPUSubtarget.h | 65 + lib/Target/R600/AMDGPUTargetMachine.cpp | 164 + lib/Target/R600/AMDGPUTargetMachine.h | 70 + lib/Target/R600/AMDIL.h | 121 + lib/Target/R600/AMDIL7XXDevice.cpp | 115 + lib/Target/R600/AMDIL7XXDevice.h | 72 + lib/Target/R600/AMDILBase.td | 85 + lib/Target/R600/AMDILCFGStructurizer.cpp | 3051 ++++++++++ lib/Target/R600/AMDILDevice.cpp | 132 + lib/Target/R600/AMDILDevice.h | 117 + lib/Target/R600/AMDILDeviceInfo.cpp | 94 + lib/Target/R600/AMDILDeviceInfo.h | 88 + lib/Target/R600/AMDILDevices.h | 19 + lib/Target/R600/AMDILEvergreenDevice.cpp | 169 + lib/Target/R600/AMDILEvergreenDevice.h | 93 + lib/Target/R600/AMDILISelDAGToDAG.cpp | 643 ++ lib/Target/R600/AMDILISelLowering.cpp | 647 ++ lib/Target/R600/AMDILInstrInfo.td | 207 + lib/Target/R600/AMDILIntrinsicInfo.cpp | 79 + lib/Target/R600/AMDILIntrinsicInfo.h | 49 + lib/Target/R600/AMDILIntrinsics.td | 232 + lib/Target/R600/AMDILNIDevice.cpp | 65 + lib/Target/R600/AMDILNIDevice.h | 57 + lib/Target/R600/AMDILPeepholeOptimizer.cpp | 1215 ++++ lib/Target/R600/AMDILRegisterInfo.td | 107 + lib/Target/R600/AMDILSIDevice.cpp | 48 + lib/Target/R600/AMDILSIDevice.h | 39 + lib/Target/R600/CMakeLists.txt | 59 + lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp | 172 + lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h | 54 + lib/Target/R600/InstPrinter/CMakeLists.txt | 7 + lib/Target/R600/InstPrinter/LLVMBuild.txt | 24 + lib/Target/R600/InstPrinter/Makefile | 15 + lib/Target/R600/LLVMBuild.txt | 32 + lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp | 90 + lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp | 83 + lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h | 30 + lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h | 40 + .../R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp | 113 + lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h | 55 + lib/Target/R600/MCTargetDesc/CMakeLists.txt | 10 + lib/Target/R600/MCTargetDesc/LLVMBuild.txt | 23 + lib/Target/R600/MCTargetDesc/Makefile | 16 + lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp | 585 ++ lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp | 201 + lib/Target/R600/Makefile | 23 + lib/Target/R600/Processors.td | 30 + lib/Target/R600/R600ControlFlowFinalizer.cpp | 268 + lib/Target/R600/R600Defines.h | 97 + lib/Target/R600/R600EmitClauseMarkers.cpp | 255 + lib/Target/R600/R600ExpandSpecialInstrs.cpp | 297 + lib/Target/R600/R600ISelLowering.cpp | 1106 ++++ lib/Target/R600/R600ISelLowering.h | 74 + lib/Target/R600/R600InstrInfo.cpp | 841 +++ lib/Target/R600/R600InstrInfo.h | 204 + lib/Target/R600/R600Instructions.td | 2267 +++++++ lib/Target/R600/R600Intrinsics.td | 31 + lib/Target/R600/R600MachineFunctionInfo.cpp | 18 + lib/Target/R600/R600MachineFunctionInfo.h | 32 + lib/Target/R600/R600MachineScheduler.cpp | 427 ++ lib/Target/R600/R600MachineScheduler.h | 120 + lib/Target/R600/R600RegisterInfo.cpp | 99 + lib/Target/R600/R600RegisterInfo.h | 55 + lib/Target/R600/R600RegisterInfo.td | 209 + lib/Target/R600/R600Schedule.td | 36 + lib/Target/R600/SIAnnotateControlFlow.cpp | 329 + lib/Target/R600/SIISelLowering.cpp | 670 +++ lib/Target/R600/SIISelLowering.h | 58 + lib/Target/R600/SIInsertWaits.cpp | 358 ++ lib/Target/R600/SIInstrFormats.td | 426 ++ lib/Target/R600/SIInstrInfo.cpp | 264 + lib/Target/R600/SIInstrInfo.h | 97 + lib/Target/R600/SIInstrInfo.td | 356 ++ lib/Target/R600/SIInstructions.td | 1607 +++++ lib/Target/R600/SIIntrinsics.td | 42 + lib/Target/R600/SILowerControlFlow.cpp | 501 ++ lib/Target/R600/SIMachineFunctionInfo.cpp | 18 + lib/Target/R600/SIMachineFunctionInfo.h | 33 + lib/Target/R600/SIRegisterInfo.cpp | 53 + lib/Target/R600/SIRegisterInfo.h | 50 + lib/Target/R600/SIRegisterInfo.td | 182 + lib/Target/R600/SISchedule.td | 15 + lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp | 26 + lib/Target/R600/TargetInfo/CMakeLists.txt | 7 + lib/Target/R600/TargetInfo/LLVMBuild.txt | 23 + lib/Target/R600/TargetInfo/Makefile | 15 + lib/Target/README.txt | 15 - lib/Target/Sparc/DelaySlotFiller.cpp | 6 +- lib/Target/Sparc/FPMover.cpp | 6 +- lib/Target/Sparc/MCTargetDesc/SparcMCAsmInfo.cpp | 5 +- lib/Target/Sparc/SparcAsmPrinter.cpp | 4 +- lib/Target/Sparc/SparcCallingConv.td | 20 + lib/Target/Sparc/SparcFrameLowering.cpp | 22 +- lib/Target/Sparc/SparcFrameLowering.h | 4 + lib/Target/Sparc/SparcISelDAGToDAG.cpp | 2 +- lib/Target/Sparc/SparcISelLowering.cpp | 164 +- lib/Target/Sparc/SparcISelLowering.h | 19 +- lib/Target/Sparc/SparcInstr64Bit.td | 285 + lib/Target/Sparc/SparcInstrFormats.td | 37 + lib/Target/Sparc/SparcInstrInfo.cpp | 4 +- lib/Target/Sparc/SparcInstrInfo.td | 199 +- lib/Target/Sparc/SparcRegisterInfo.cpp | 46 +- lib/Target/Sparc/SparcRegisterInfo.h | 11 +- lib/Target/Sparc/SparcRegisterInfo.td | 14 +- lib/Target/Sparc/SparcTargetMachine.cpp | 4 +- lib/Target/Sparc/SparcTargetMachine.h | 17 +- lib/Target/Sparc/TargetInfo/SparcTargetInfo.cpp | 2 +- lib/Target/Target.cpp | 5 +- lib/Target/TargetInstrInfo.cpp | 88 - lib/Target/TargetIntrinsicInfo.cpp | 2 +- lib/Target/TargetLibraryInfo.cpp | 283 +- lib/Target/TargetLoweringObjectFile.cpp | 40 +- lib/Target/TargetMachine.cpp | 34 +- lib/Target/TargetMachineC.cpp | 16 +- lib/Target/TargetRegisterInfo.cpp | 248 - lib/Target/TargetSubtargetInfo.cpp | 4 + lib/Target/TargetTransformImpl.cpp | 353 -- lib/Target/X86/AsmParser/CMakeLists.txt | 1 - lib/Target/X86/AsmParser/X86AsmLexer.cpp | 159 - lib/Target/X86/AsmParser/X86AsmParser.cpp | 1129 ++-- lib/Target/X86/CMakeLists.txt | 3 +- lib/Target/X86/Disassembler/X86Disassembler.cpp | 9 +- lib/Target/X86/Disassembler/X86Disassembler.h | 4 - .../X86/Disassembler/X86DisassemblerDecoder.c | 347 +- lib/Target/X86/InstPrinter/X86ATTInstPrinter.cpp | 12 +- lib/Target/X86/InstPrinter/X86InstComments.cpp | 135 +- lib/Target/X86/InstPrinter/X86IntelInstPrinter.cpp | 4 +- lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp | 34 +- lib/Target/X86/MCTargetDesc/X86BaseInfo.h | 27 +- lib/Target/X86/MCTargetDesc/X86MCAsmInfo.cpp | 14 +- lib/Target/X86/MCTargetDesc/X86MCCodeEmitter.cpp | 45 +- lib/Target/X86/MCTargetDesc/X86MCTargetDesc.cpp | 11 +- .../X86/MCTargetDesc/X86MachObjectWriter.cpp | 8 +- lib/Target/X86/Makefile | 3 +- lib/Target/X86/README-SSE.txt | 9 + lib/Target/X86/README.txt | 37 - lib/Target/X86/TargetInfo/X86TargetInfo.cpp | 2 +- lib/Target/X86/Utils/X86ShuffleDecode.cpp | 18 + lib/Target/X86/Utils/X86ShuffleDecode.h | 2 + lib/Target/X86/X86.h | 11 +- lib/Target/X86/X86.td | 101 +- lib/Target/X86/X86AsmPrinter.cpp | 73 +- lib/Target/X86/X86AsmPrinter.h | 8 +- lib/Target/X86/X86COFFMachineModuleInfo.h | 2 +- lib/Target/X86/X86CallingConv.td | 102 +- lib/Target/X86/X86CodeEmitter.cpp | 36 +- lib/Target/X86/X86CompilationCallback_Win64.asm | 4 +- lib/Target/X86/X86FastISel.cpp | 169 +- lib/Target/X86/X86FloatingPoint.cpp | 12 +- lib/Target/X86/X86FrameLowering.cpp | 342 +- lib/Target/X86/X86FrameLowering.h | 6 + lib/Target/X86/X86ISelDAGToDAG.cpp | 38 +- lib/Target/X86/X86ISelLowering.cpp | 3418 ++++++----- lib/Target/X86/X86ISelLowering.h | 116 +- lib/Target/X86/X86Instr3DNow.td | 15 +- lib/Target/X86/X86InstrArithmetic.td | 577 +- lib/Target/X86/X86InstrCMovSetCC.td | 21 +- lib/Target/X86/X86InstrCompiler.td | 331 +- lib/Target/X86/X86InstrControl.td | 72 +- lib/Target/X86/X86InstrExtension.td | 73 +- lib/Target/X86/X86InstrFMA.td | 74 +- lib/Target/X86/X86InstrFPStack.td | 26 +- lib/Target/X86/X86InstrFormats.td | 163 +- lib/Target/X86/X86InstrFragmentsSIMD.td | 12 +- lib/Target/X86/X86InstrInfo.cpp | 746 +-- lib/Target/X86/X86InstrInfo.td | 336 +- lib/Target/X86/X86InstrMMX.td | 80 +- lib/Target/X86/X86InstrSSE.td | 2554 ++++---- lib/Target/X86/X86InstrShiftRotate.td | 58 +- lib/Target/X86/X86InstrSystem.td | 32 +- lib/Target/X86/X86InstrTSX.td | 9 +- lib/Target/X86/X86JITInfo.cpp | 36 +- lib/Target/X86/X86JITInfo.h | 2 +- lib/Target/X86/X86MCInstLower.cpp | 120 +- lib/Target/X86/X86PadShortFunction.cpp | 212 + lib/Target/X86/X86RegisterInfo.cpp | 296 +- lib/Target/X86/X86RegisterInfo.h | 7 +- lib/Target/X86/X86SchedHaswell.td | 126 + lib/Target/X86/X86SchedSandyBridge.td | 122 + lib/Target/X86/X86Schedule.td | 94 +- lib/Target/X86/X86ScheduleAtom.td | 2 +- lib/Target/X86/X86SelectionDAGInfo.cpp | 10 +- lib/Target/X86/X86Subtarget.cpp | 157 +- lib/Target/X86/X86Subtarget.h | 61 +- lib/Target/X86/X86TargetMachine.cpp | 58 +- lib/Target/X86/X86TargetMachine.h | 32 +- lib/Target/X86/X86TargetObjectFile.cpp | 16 +- lib/Target/X86/X86TargetObjectFile.h | 8 +- lib/Target/X86/X86TargetTransformInfo.cpp | 495 ++ lib/Target/X86/X86VZeroUpper.cpp | 10 + lib/Target/XCore/CMakeLists.txt | 4 + lib/Target/XCore/Disassembler/CMakeLists.txt | 5 + lib/Target/XCore/Disassembler/LLVMBuild.txt | 23 + lib/Target/XCore/Disassembler/Makefile | 16 + .../XCore/Disassembler/XCoreDisassembler.cpp | 800 +++ lib/Target/XCore/InstPrinter/CMakeLists.txt | 7 + lib/Target/XCore/InstPrinter/LLVMBuild.txt | 23 + lib/Target/XCore/InstPrinter/Makefile | 16 + lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp | 97 + lib/Target/XCore/InstPrinter/XCoreInstPrinter.h | 44 + lib/Target/XCore/LLVMBuild.txt | 3 +- lib/Target/XCore/MCTargetDesc/LLVMBuild.txt | 2 +- .../XCore/MCTargetDesc/XCoreMCTargetDesc.cpp | 14 + lib/Target/XCore/Makefile | 6 +- lib/Target/XCore/TargetInfo/XCoreTargetInfo.cpp | 2 +- lib/Target/XCore/XCore.td | 6 + lib/Target/XCore/XCoreAsmPrinter.cpp | 80 +- lib/Target/XCore/XCoreFrameLowering.cpp | 71 +- lib/Target/XCore/XCoreFrameLowering.h | 4 + lib/Target/XCore/XCoreISelDAGToDAG.cpp | 27 +- lib/Target/XCore/XCoreISelLowering.cpp | 162 +- lib/Target/XCore/XCoreISelLowering.h | 6 +- lib/Target/XCore/XCoreInstrFormats.td | 269 +- lib/Target/XCore/XCoreInstrInfo.cpp | 8 +- lib/Target/XCore/XCoreInstrInfo.td | 1108 ++-- lib/Target/XCore/XCoreMCInstLower.cpp | 117 + lib/Target/XCore/XCoreMCInstLower.h | 42 + lib/Target/XCore/XCoreMachineFunctionInfo.h | 2 +- lib/Target/XCore/XCoreRegisterInfo.cpp | 96 +- lib/Target/XCore/XCoreRegisterInfo.h | 7 +- lib/Target/XCore/XCoreRegisterInfo.td | 11 +- lib/Target/XCore/XCoreSubtarget.h | 2 +- lib/Target/XCore/XCoreTargetMachine.cpp | 6 +- lib/Target/XCore/XCoreTargetMachine.h | 15 +- lib/Target/XCore/XCoreTargetObjectFile.cpp | 2 +- lib/Transforms/CMakeLists.txt | 1 + lib/Transforms/Hello/Hello.cpp | 4 +- lib/Transforms/IPO/ArgumentPromotion.cpp | 92 +- lib/Transforms/IPO/ConstantMerge.cpp | 10 +- lib/Transforms/IPO/DeadArgumentElimination.cpp | 134 +- lib/Transforms/IPO/ExtractGV.cpp | 16 +- lib/Transforms/IPO/FunctionAttrs.cpp | 784 ++- lib/Transforms/IPO/GlobalDCE.cpp | 6 +- lib/Transforms/IPO/GlobalOpt.cpp | 246 +- lib/Transforms/IPO/IPConstantPropagation.cpp | 12 +- lib/Transforms/IPO/InlineAlways.cpp | 126 +- lib/Transforms/IPO/InlineSimple.cpp | 73 +- lib/Transforms/IPO/Inliner.cpp | 82 +- lib/Transforms/IPO/Internalize.cpp | 24 +- lib/Transforms/IPO/LLVMBuild.txt | 2 +- lib/Transforms/IPO/LoopExtractor.cpp | 8 +- lib/Transforms/IPO/MergeFunctions.cpp | 30 +- lib/Transforms/IPO/PartialInlining.cpp | 10 +- lib/Transforms/IPO/PassManagerBuilder.cpp | 28 +- lib/Transforms/IPO/PruneEH.cpp | 28 +- lib/Transforms/IPO/StripDeadPrototypes.cpp | 4 +- lib/Transforms/IPO/StripSymbols.cpp | 16 +- lib/Transforms/InstCombine/InstCombine.h | 71 +- lib/Transforms/InstCombine/InstCombineAddSub.cpp | 948 ++- lib/Transforms/InstCombine/InstCombineAndOrXor.cpp | 410 +- lib/Transforms/InstCombine/InstCombineCalls.cpp | 160 +- lib/Transforms/InstCombine/InstCombineCasts.cpp | 388 +- lib/Transforms/InstCombine/InstCombineCompares.cpp | 137 +- .../InstCombine/InstCombineLoadStoreAlloca.cpp | 39 +- .../InstCombine/InstCombineMulDivRem.cpp | 421 +- lib/Transforms/InstCombine/InstCombinePHI.cpp | 6 +- lib/Transforms/InstCombine/InstCombineSelect.cpp | 9 +- lib/Transforms/InstCombine/InstCombineShifts.cpp | 173 +- .../InstCombine/InstCombineSimplifyDemanded.cpp | 354 +- .../InstCombine/InstCombineVectorOps.cpp | 23 +- lib/Transforms/InstCombine/InstCombineWorklist.h | 38 +- .../InstCombine/InstructionCombining.cpp | 134 +- .../Instrumentation/AddressSanitizer.cpp | 1036 ++-- lib/Transforms/Instrumentation/BlackList.cpp | 58 +- lib/Transforms/Instrumentation/BlackList.h | 57 - lib/Transforms/Instrumentation/BoundsChecking.cpp | 19 +- lib/Transforms/Instrumentation/CMakeLists.txt | 1 + lib/Transforms/Instrumentation/EdgeProfiling.cpp | 6 +- lib/Transforms/Instrumentation/GCOVProfiling.cpp | 299 +- lib/Transforms/Instrumentation/Instrumentation.cpp | 2 + .../Instrumentation/MaximumSpanningTree.h | 4 +- lib/Transforms/Instrumentation/MemorySanitizer.cpp | 1985 ++++++ .../Instrumentation/OptimalEdgeProfiling.cpp | 16 +- lib/Transforms/Instrumentation/PathProfiling.cpp | 19 +- lib/Transforms/Instrumentation/ProfilingUtils.cpp | 10 +- lib/Transforms/Instrumentation/ThreadSanitizer.cpp | 160 +- lib/Transforms/LLVMBuild.txt | 2 +- lib/Transforms/Makefile | 2 +- lib/Transforms/ObjCARC/CMakeLists.txt | 13 + lib/Transforms/ObjCARC/DependencyAnalysis.cpp | 262 + lib/Transforms/ObjCARC/DependencyAnalysis.h | 79 + lib/Transforms/ObjCARC/LLVMBuild.txt | 23 + lib/Transforms/ObjCARC/Makefile | 15 + lib/Transforms/ObjCARC/ObjCARC.cpp | 48 + lib/Transforms/ObjCARC/ObjCARC.h | 395 ++ lib/Transforms/ObjCARC/ObjCARCAPElim.cpp | 175 + lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp | 162 + lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h | 74 + lib/Transforms/ObjCARC/ObjCARCContract.cpp | 541 ++ lib/Transforms/ObjCARC/ObjCARCExpand.cpp | 128 + lib/Transforms/ObjCARC/ObjCARCOpts.cpp | 3026 ++++++++++ lib/Transforms/ObjCARC/ObjCARCUtil.cpp | 252 + lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp | 177 + lib/Transforms/ObjCARC/ProvenanceAnalysis.h | 80 + lib/Transforms/Scalar/ADCE.cpp | 12 +- lib/Transforms/Scalar/BasicBlockPlacement.cpp | 6 +- lib/Transforms/Scalar/CMakeLists.txt | 1 - lib/Transforms/Scalar/CodeGenPrepare.cpp | 698 ++- lib/Transforms/Scalar/ConstantProp.cpp | 10 +- .../Scalar/CorrelatedValuePropagation.cpp | 39 +- lib/Transforms/Scalar/DCE.cpp | 6 +- lib/Transforms/Scalar/DeadStoreElimination.cpp | 28 +- lib/Transforms/Scalar/EarlyCSE.cpp | 16 +- lib/Transforms/Scalar/GVN.cpp | 79 +- lib/Transforms/Scalar/GlobalMerge.cpp | 137 +- lib/Transforms/Scalar/IndVarSimplify.cpp | 65 +- lib/Transforms/Scalar/JumpThreading.cpp | 47 +- lib/Transforms/Scalar/LICM.cpp | 65 +- lib/Transforms/Scalar/LoopDeletion.cpp | 60 +- lib/Transforms/Scalar/LoopIdiomRecognize.cpp | 543 +- lib/Transforms/Scalar/LoopInstSimplify.cpp | 9 +- lib/Transforms/Scalar/LoopRotation.cpp | 29 +- lib/Transforms/Scalar/LoopStrengthReduce.cpp | 647 +- lib/Transforms/Scalar/LoopUnrollPass.cpp | 31 +- lib/Transforms/Scalar/LoopUnswitch.cpp | 43 +- lib/Transforms/Scalar/LowerAtomic.cpp | 6 +- lib/Transforms/Scalar/MemCpyOptimizer.cpp | 10 +- lib/Transforms/Scalar/ObjCARC.cpp | 4232 ------------- lib/Transforms/Scalar/Reassociate.cpp | 358 +- lib/Transforms/Scalar/Reg2Mem.cpp | 14 +- lib/Transforms/Scalar/SCCP.cpp | 40 +- lib/Transforms/Scalar/SROA.cpp | 1491 ++--- lib/Transforms/Scalar/Scalar.cpp | 15 +- lib/Transforms/Scalar/ScalarReplAggregates.cpp | 28 +- lib/Transforms/Scalar/SimplifyCFGPass.cpp | 53 +- lib/Transforms/Scalar/SimplifyLibCalls.cpp | 1481 +---- lib/Transforms/Scalar/Sink.cpp | 6 +- lib/Transforms/Scalar/TailRecursionElimination.cpp | 44 +- lib/Transforms/Utils/AddrModeMatcher.cpp | 577 -- lib/Transforms/Utils/BasicBlockUtils.cpp | 92 +- lib/Transforms/Utils/BreakCriticalEdges.cpp | 12 +- lib/Transforms/Utils/BuildLibCalls.cpp | 174 +- lib/Transforms/Utils/BypassSlowDivision.cpp | 12 +- lib/Transforms/Utils/CMakeLists.txt | 1 - lib/Transforms/Utils/CloneFunction.cpp | 45 +- lib/Transforms/Utils/CloneModule.cpp | 10 +- lib/Transforms/Utils/CmpInstAnalysis.cpp | 4 +- lib/Transforms/Utils/CodeExtractor.cpp | 21 +- lib/Transforms/Utils/DemoteRegToStack.cpp | 33 +- lib/Transforms/Utils/InlineFunction.cpp | 85 +- lib/Transforms/Utils/InstructionNamer.cpp | 4 +- lib/Transforms/Utils/IntegerDivision.cpp | 112 +- lib/Transforms/Utils/LCSSA.cpp | 14 +- lib/Transforms/Utils/Local.cpp | 101 +- lib/Transforms/Utils/LoopSimplify.cpp | 24 +- lib/Transforms/Utils/LoopUnroll.cpp | 2 +- lib/Transforms/Utils/LoopUnrollRuntime.cpp | 2 +- lib/Transforms/Utils/LowerExpectIntrinsic.cpp | 20 +- lib/Transforms/Utils/LowerInvoke.cpp | 18 +- lib/Transforms/Utils/LowerSwitch.cpp | 12 +- lib/Transforms/Utils/Mem2Reg.cpp | 8 +- lib/Transforms/Utils/MetaRenamer.cpp | 35 +- lib/Transforms/Utils/ModuleUtils.cpp | 8 +- lib/Transforms/Utils/PromoteMemoryToRegister.cpp | 28 +- lib/Transforms/Utils/SSAUpdater.cpp | 8 +- lib/Transforms/Utils/SimplifyCFG.cpp | 416 +- lib/Transforms/Utils/SimplifyIndVar.cpp | 10 +- lib/Transforms/Utils/SimplifyInstructions.cpp | 10 +- lib/Transforms/Utils/SimplifyLibCalls.cpp | 988 ++- lib/Transforms/Utils/UnifyFunctionExitNodes.cpp | 10 +- lib/Transforms/Utils/ValueMapper.cpp | 29 +- lib/Transforms/Vectorize/BBVectorize.cpp | 1116 ++-- lib/Transforms/Vectorize/LoopVectorize.cpp | 3551 ++++++++--- lib/Transforms/Vectorize/Vectorize.cpp | 10 +- lib/VMCore/AsmWriter.cpp | 2160 ------- lib/VMCore/Attributes.cpp | 547 -- lib/VMCore/AttributesImpl.h | 71 - lib/VMCore/AutoUpgrade.cpp | 393 -- lib/VMCore/BasicBlock.cpp | 371 -- lib/VMCore/CMakeLists.txt | 52 - lib/VMCore/ConstantFold.cpp | 2066 ------- lib/VMCore/ConstantFold.h | 56 - lib/VMCore/Constants.cpp | 2671 --------- lib/VMCore/ConstantsContext.h | 774 --- lib/VMCore/Core.cpp | 2410 -------- lib/VMCore/DIBuilder.cpp | 1045 ---- lib/VMCore/DataLayout.cpp | 749 --- lib/VMCore/DebugInfo.cpp | 1178 ---- lib/VMCore/DebugLoc.cpp | 315 - lib/VMCore/Dominators.cpp | 302 - lib/VMCore/Function.cpp | 668 --- lib/VMCore/GCOV.cpp | 283 - lib/VMCore/GVMaterializer.cpp | 18 - lib/VMCore/Globals.cpp | 263 - lib/VMCore/IRBuilder.cpp | 153 - lib/VMCore/InlineAsm.cpp | 295 - lib/VMCore/Instruction.cpp | 447 -- lib/VMCore/Instructions.cpp | 3544 ----------- lib/VMCore/IntrinsicInst.cpp | 73 - lib/VMCore/LLVMBuild.txt | 22 - lib/VMCore/LLVMContext.cpp | 162 - lib/VMCore/LLVMContextImpl.cpp | 149 - lib/VMCore/LLVMContextImpl.h | 369 -- lib/VMCore/LeakDetector.cpp | 69 - lib/VMCore/LeaksContext.h | 92 - lib/VMCore/Makefile | 33 - lib/VMCore/Metadata.cpp | 744 --- lib/VMCore/Module.cpp | 469 -- lib/VMCore/Pass.cpp | 297 - lib/VMCore/PassManager.cpp | 1861 ------ lib/VMCore/PassRegistry.cpp | 209 - lib/VMCore/PrintModulePass.cpp | 101 - lib/VMCore/SymbolTableListTraitsImpl.h | 118 - lib/VMCore/TargetTransformInfo.cpp | 31 - lib/VMCore/Type.cpp | 762 --- lib/VMCore/TypeFinder.cpp | 148 - lib/VMCore/Use.cpp | 145 - lib/VMCore/User.cpp | 90 - lib/VMCore/Value.cpp | 694 --- lib/VMCore/ValueSymbolTable.cpp | 117 - lib/VMCore/ValueTypes.cpp | 248 - lib/VMCore/Verifier.cpp | 1997 ------ 1389 files changed, 167072 insertions(+), 104312 deletions(-) delete mode 100644 lib/Analysis/DbgInfoPrinter.cpp create mode 100644 lib/Analysis/IPA/CallPrinter.cpp create mode 100644 lib/Analysis/IPA/InlineCost.cpp delete mode 100644 lib/Analysis/InlineCost.cpp create mode 100644 lib/Analysis/PtrUseVisitor.cpp create mode 100644 lib/Analysis/TargetTransformInfo.cpp create mode 100644 lib/Bitcode/Reader/BitstreamReader.cpp create mode 100644 lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp create mode 100644 lib/CodeGen/BasicTargetTransformInfo.cpp delete mode 100644 lib/CodeGen/CodePlacementOpt.cpp create mode 100644 lib/CodeGen/ErlangGC.cpp delete mode 100644 lib/CodeGen/LiveIntervalUnion.h delete mode 100644 lib/CodeGen/LiveRegMatrix.h delete mode 100644 lib/CodeGen/MachineLoopRanges.cpp delete mode 100644 lib/CodeGen/MachineTraceMetrics.h create mode 100644 lib/CodeGen/TargetInstrInfo.cpp delete mode 100644 lib/CodeGen/TargetInstrInfoImpl.cpp create mode 100644 lib/CodeGen/TargetLoweringBase.cpp create mode 100644 lib/CodeGen/TargetRegisterInfo.cpp delete mode 100644 lib/CodeGen/VirtRegMap.h create mode 100644 lib/DebugInfo/DWARFDebugFrame.cpp create mode 100644 lib/DebugInfo/DWARFDebugFrame.h create mode 100644 lib/DebugInfo/DWARFRelocMap.h create mode 100644 lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp create mode 100644 lib/IR/AsmWriter.cpp create mode 100644 lib/IR/AttributeImpl.h create mode 100644 lib/IR/Attributes.cpp create mode 100644 lib/IR/AutoUpgrade.cpp create mode 100644 lib/IR/BasicBlock.cpp create mode 100644 lib/IR/CMakeLists.txt create mode 100644 lib/IR/ConstantFold.cpp create mode 100644 lib/IR/ConstantFold.h create mode 100644 lib/IR/Constants.cpp create mode 100644 lib/IR/ConstantsContext.h create mode 100644 lib/IR/Core.cpp create mode 100644 lib/IR/DIBuilder.cpp create mode 100644 lib/IR/DataLayout.cpp create mode 100644 lib/IR/DebugInfo.cpp create mode 100644 lib/IR/DebugLoc.cpp create mode 100644 lib/IR/Dominators.cpp create mode 100644 lib/IR/Function.cpp create mode 100644 lib/IR/GCOV.cpp create mode 100644 lib/IR/GVMaterializer.cpp create mode 100644 lib/IR/Globals.cpp create mode 100644 lib/IR/IRBuilder.cpp create mode 100644 lib/IR/InlineAsm.cpp create mode 100644 lib/IR/Instruction.cpp create mode 100644 lib/IR/Instructions.cpp create mode 100644 lib/IR/IntrinsicInst.cpp create mode 100644 lib/IR/LLVMBuild.txt create mode 100644 lib/IR/LLVMContext.cpp create mode 100644 lib/IR/LLVMContextImpl.cpp create mode 100644 lib/IR/LLVMContextImpl.h create mode 100644 lib/IR/LeakDetector.cpp create mode 100644 lib/IR/LeaksContext.h create mode 100644 lib/IR/Makefile create mode 100644 lib/IR/Metadata.cpp create mode 100644 lib/IR/Module.cpp create mode 100644 lib/IR/Pass.cpp create mode 100644 lib/IR/PassManager.cpp create mode 100644 lib/IR/PassRegistry.cpp create mode 100644 lib/IR/PrintModulePass.cpp create mode 100644 lib/IR/SymbolTableListTraitsImpl.h create mode 100644 lib/IR/Type.cpp create mode 100644 lib/IR/TypeFinder.cpp create mode 100644 lib/IR/Use.cpp create mode 100644 lib/IR/User.cpp create mode 100644 lib/IR/Value.cpp create mode 100644 lib/IR/ValueSymbolTable.cpp create mode 100644 lib/IR/ValueTypes.cpp create mode 100644 lib/IR/Verifier.cpp create mode 100644 lib/IRReader/CMakeLists.txt create mode 100644 lib/IRReader/IRReader.cpp create mode 100644 lib/IRReader/LLVMBuild.txt create mode 100644 lib/IRReader/Makefile delete mode 100644 lib/Linker/LinkArchives.cpp delete mode 100644 lib/Linker/LinkItems.cpp delete mode 100644 lib/MC/MCDisassembler/EDDisassembler.cpp delete mode 100644 lib/MC/MCDisassembler/EDDisassembler.h delete mode 100644 lib/MC/MCDisassembler/EDInfo.h delete mode 100644 lib/MC/MCDisassembler/EDInst.cpp delete mode 100644 lib/MC/MCDisassembler/EDInst.h delete mode 100644 lib/MC/MCDisassembler/EDMain.cpp delete mode 100644 lib/MC/MCDisassembler/EDOperand.cpp delete mode 100644 lib/MC/MCDisassembler/EDOperand.h delete mode 100644 lib/MC/MCDisassembler/EDToken.cpp delete mode 100644 lib/MC/MCDisassembler/EDToken.h delete mode 100644 lib/MC/MCELF.h delete mode 100644 lib/MC/MCTargetAsmLexer.cpp create mode 100644 lib/Option/Arg.cpp create mode 100644 lib/Option/ArgList.cpp create mode 100644 lib/Option/CMakeLists.txt create mode 100644 lib/Option/LLVMBuild.txt create mode 100644 lib/Option/Makefile create mode 100644 lib/Option/OptTable.cpp create mode 100644 lib/Option/Option.cpp create mode 100644 lib/Support/ConvertUTF.c create mode 100644 lib/Support/ConvertUTFWrapper.cpp create mode 100644 lib/Support/Unix/Watchdog.inc create mode 100644 lib/Support/Watchdog.cpp create mode 100644 lib/Support/Windows/Watchdog.inc create mode 100644 lib/Support/YAMLTraits.cpp create mode 100644 lib/Target/AArch64/AArch64.h create mode 100644 lib/Target/AArch64/AArch64.td create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.cpp create mode 100644 lib/Target/AArch64/AArch64AsmPrinter.h create mode 100644 lib/Target/AArch64/AArch64BranchFixupPass.cpp create mode 100644 lib/Target/AArch64/AArch64CallingConv.td create mode 100644 lib/Target/AArch64/AArch64FrameLowering.cpp create mode 100644 lib/Target/AArch64/AArch64FrameLowering.h create mode 100644 lib/Target/AArch64/AArch64ISelDAGToDAG.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.cpp create mode 100644 lib/Target/AArch64/AArch64ISelLowering.h create mode 100644 lib/Target/AArch64/AArch64InstrFormats.td create mode 100644 lib/Target/AArch64/AArch64InstrInfo.cpp create mode 100644 lib/Target/AArch64/AArch64InstrInfo.h create mode 100644 lib/Target/AArch64/AArch64InstrInfo.td create mode 100644 lib/Target/AArch64/AArch64MCInstLower.cpp create mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.cpp create mode 100644 lib/Target/AArch64/AArch64MachineFunctionInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.cpp create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.h create mode 100644 lib/Target/AArch64/AArch64RegisterInfo.td create mode 100644 lib/Target/AArch64/AArch64Schedule.td create mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.cpp create mode 100644 lib/Target/AArch64/AArch64SelectionDAGInfo.h create mode 100644 lib/Target/AArch64/AArch64Subtarget.cpp create mode 100644 lib/Target/AArch64/AArch64Subtarget.h create mode 100644 lib/Target/AArch64/AArch64TargetMachine.cpp create mode 100644 lib/Target/AArch64/AArch64TargetMachine.h create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.cpp create mode 100644 lib/Target/AArch64/AArch64TargetObjectFile.h create mode 100644 lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp create mode 100644 lib/Target/AArch64/AsmParser/CMakeLists.txt create mode 100644 lib/Target/AArch64/AsmParser/LLVMBuild.txt create mode 100644 lib/Target/AArch64/AsmParser/Makefile create mode 100644 lib/Target/AArch64/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/AArch64Disassembler.cpp create mode 100644 lib/Target/AArch64/Disassembler/CMakeLists.txt create mode 100644 lib/Target/AArch64/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Disassembler/Makefile create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.cpp create mode 100644 lib/Target/AArch64/InstPrinter/AArch64InstPrinter.h create mode 100644 lib/Target/AArch64/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/AArch64/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/AArch64/InstPrinter/Makefile create mode 100644 lib/Target/AArch64/LLVMBuild.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64AsmBackend.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFObjectWriter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64ELFStreamer.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64FixupKinds.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCAsmInfo.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCCodeEmitter.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCExpr.h create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.cpp create mode 100644 lib/Target/AArch64/MCTargetDesc/AArch64MCTargetDesc.h create mode 100644 lib/Target/AArch64/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/AArch64/MCTargetDesc/Makefile create mode 100644 lib/Target/AArch64/Makefile create mode 100644 lib/Target/AArch64/README.txt create mode 100644 lib/Target/AArch64/TargetInfo/AArch64TargetInfo.cpp create mode 100644 lib/Target/AArch64/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/AArch64/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/AArch64/TargetInfo/Makefile create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.cpp create mode 100644 lib/Target/AArch64/Utils/AArch64BaseInfo.h create mode 100644 lib/Target/AArch64/Utils/CMakeLists.txt create mode 100644 lib/Target/AArch64/Utils/LLVMBuild.txt create mode 100644 lib/Target/AArch64/Utils/Makefile create mode 100644 lib/Target/ARM/A15SDOptimizer.cpp create mode 100644 lib/Target/ARM/ARMTargetTransformInfo.cpp delete mode 100644 lib/Target/ARM/AsmParser/ARMAsmLexer.cpp create mode 100755 lib/Target/ARM/LICENSE.TXT create mode 100644 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp create mode 100644 lib/Target/ARM/MCTargetDesc/ARMELFStreamer.h create mode 100644 lib/Target/ARM/MCTargetDesc/ARMUnwindOp.h delete mode 100644 lib/Target/CellSPU/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/CellSDKIntrinsics.td delete mode 100644 lib/Target/CellSPU/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/MCTargetDesc/Makefile delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCAsmInfo.h delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.cpp delete mode 100644 lib/Target/CellSPU/MCTargetDesc/SPUMCTargetDesc.h delete mode 100644 lib/Target/CellSPU/Makefile delete mode 100644 lib/Target/CellSPU/README.txt delete mode 100644 lib/Target/CellSPU/SPU.h delete mode 100644 lib/Target/CellSPU/SPU.td delete mode 100644 lib/Target/CellSPU/SPU128InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPU64InstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUAsmPrinter.cpp delete mode 100644 lib/Target/CellSPU/SPUCallingConv.td delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUFrameLowering.h delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.cpp delete mode 100644 lib/Target/CellSPU/SPUHazardRecognizers.h delete mode 100644 lib/Target/CellSPU/SPUISelDAGToDAG.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.cpp delete mode 100644 lib/Target/CellSPU/SPUISelLowering.h delete mode 100644 lib/Target/CellSPU/SPUInstrBuilder.h delete mode 100644 lib/Target/CellSPU/SPUInstrFormats.td delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.h delete mode 100644 lib/Target/CellSPU/SPUInstrInfo.td delete mode 100644 lib/Target/CellSPU/SPUMachineFunction.cpp delete mode 100644 lib/Target/CellSPU/SPUMachineFunction.h delete mode 100644 lib/Target/CellSPU/SPUMathInstr.td delete mode 100644 lib/Target/CellSPU/SPUNodes.td delete mode 100644 lib/Target/CellSPU/SPUNopFiller.cpp delete mode 100644 lib/Target/CellSPU/SPUOperands.td delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.cpp delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.h delete mode 100644 lib/Target/CellSPU/SPURegisterInfo.td delete mode 100644 lib/Target/CellSPU/SPURegisterNames.h delete mode 100644 lib/Target/CellSPU/SPUSchedule.td delete mode 100644 lib/Target/CellSPU/SPUSelectionDAGInfo.cpp delete mode 100644 lib/Target/CellSPU/SPUSelectionDAGInfo.h delete mode 100644 lib/Target/CellSPU/SPUSubtarget.cpp delete mode 100644 lib/Target/CellSPU/SPUSubtarget.h delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.cpp delete mode 100644 lib/Target/CellSPU/SPUTargetMachine.h delete mode 100644 lib/Target/CellSPU/TargetInfo/CMakeLists.txt delete mode 100644 lib/Target/CellSPU/TargetInfo/CellSPUTargetInfo.cpp delete mode 100644 lib/Target/CellSPU/TargetInfo/LLVMBuild.txt delete mode 100644 lib/Target/CellSPU/TargetInfo/Makefile create mode 100644 lib/Target/Hexagon/HexagonFixupHwLoops.cpp delete mode 100644 lib/Target/Hexagon/HexagonImmediates.td delete mode 100644 lib/Target/Hexagon/HexagonMCInst.h create mode 100644 lib/Target/Hexagon/HexagonOperands.td create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.cpp create mode 100644 lib/Target/Hexagon/MCTargetDesc/HexagonMCInst.h delete mode 100644 lib/Target/MBlaze/AsmParser/MBlazeAsmLexer.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsELFStreamer.h create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.cpp create mode 100644 lib/Target/Mips/MCTargetDesc/MipsReginfo.h create mode 100644 lib/Target/Mips/Mips16ISelDAGToDAG.cpp create mode 100644 lib/Target/Mips/Mips16ISelDAGToDAG.h create mode 100644 lib/Target/Mips/Mips16ISelLowering.cpp create mode 100644 lib/Target/Mips/Mips16ISelLowering.h create mode 100644 lib/Target/Mips/MipsConstantIslandPass.cpp create mode 100644 lib/Target/Mips/MipsISelDAGToDAG.h create mode 100644 lib/Target/Mips/MipsSEISelDAGToDAG.cpp create mode 100644 lib/Target/Mips/MipsSEISelDAGToDAG.h create mode 100644 lib/Target/Mips/MipsSEISelLowering.cpp create mode 100644 lib/Target/Mips/MipsSEISelLowering.h create mode 100644 lib/Target/NVPTX/NVVMReflect.cpp delete mode 100644 lib/Target/NVPTX/VectorElementize.cpp delete mode 100644 lib/Target/NVPTX/gen-register-defs.py delete mode 100644 lib/Target/PowerPC/MCTargetDesc/PPCBaseInfo.h create mode 100644 lib/Target/PowerPC/PPCTargetTransformInfo.cpp create mode 100644 lib/Target/R600/AMDGPU.h create mode 100644 lib/Target/R600/AMDGPU.td create mode 100644 lib/Target/R600/AMDGPUAsmPrinter.cpp create mode 100644 lib/Target/R600/AMDGPUAsmPrinter.h create mode 100644 lib/Target/R600/AMDGPUCallingConv.td create mode 100644 lib/Target/R600/AMDGPUConvertToISA.cpp create mode 100644 lib/Target/R600/AMDGPUFrameLowering.cpp create mode 100644 lib/Target/R600/AMDGPUFrameLowering.h create mode 100644 lib/Target/R600/AMDGPUISelLowering.cpp create mode 100644 lib/Target/R600/AMDGPUISelLowering.h create mode 100644 lib/Target/R600/AMDGPUIndirectAddressing.cpp create mode 100644 lib/Target/R600/AMDGPUInstrInfo.cpp create mode 100644 lib/Target/R600/AMDGPUInstrInfo.h create mode 100644 lib/Target/R600/AMDGPUInstrInfo.td create mode 100644 lib/Target/R600/AMDGPUInstructions.td create mode 100644 lib/Target/R600/AMDGPUIntrinsics.td create mode 100644 lib/Target/R600/AMDGPUMCInstLower.cpp create mode 100644 lib/Target/R600/AMDGPUMCInstLower.h create mode 100644 lib/Target/R600/AMDGPUMachineFunction.cpp create mode 100644 lib/Target/R600/AMDGPUMachineFunction.h create mode 100644 lib/Target/R600/AMDGPURegisterInfo.cpp create mode 100644 lib/Target/R600/AMDGPURegisterInfo.h create mode 100644 lib/Target/R600/AMDGPURegisterInfo.td create mode 100644 lib/Target/R600/AMDGPUStructurizeCFG.cpp create mode 100644 lib/Target/R600/AMDGPUSubtarget.cpp create mode 100644 lib/Target/R600/AMDGPUSubtarget.h create mode 100644 lib/Target/R600/AMDGPUTargetMachine.cpp create mode 100644 lib/Target/R600/AMDGPUTargetMachine.h create mode 100644 lib/Target/R600/AMDIL.h create mode 100644 lib/Target/R600/AMDIL7XXDevice.cpp create mode 100644 lib/Target/R600/AMDIL7XXDevice.h create mode 100644 lib/Target/R600/AMDILBase.td create mode 100644 lib/Target/R600/AMDILCFGStructurizer.cpp create mode 100644 lib/Target/R600/AMDILDevice.cpp create mode 100644 lib/Target/R600/AMDILDevice.h create mode 100644 lib/Target/R600/AMDILDeviceInfo.cpp create mode 100644 lib/Target/R600/AMDILDeviceInfo.h create mode 100644 lib/Target/R600/AMDILDevices.h create mode 100644 lib/Target/R600/AMDILEvergreenDevice.cpp create mode 100644 lib/Target/R600/AMDILEvergreenDevice.h create mode 100644 lib/Target/R600/AMDILISelDAGToDAG.cpp create mode 100644 lib/Target/R600/AMDILISelLowering.cpp create mode 100644 lib/Target/R600/AMDILInstrInfo.td create mode 100644 lib/Target/R600/AMDILIntrinsicInfo.cpp create mode 100644 lib/Target/R600/AMDILIntrinsicInfo.h create mode 100644 lib/Target/R600/AMDILIntrinsics.td create mode 100644 lib/Target/R600/AMDILNIDevice.cpp create mode 100644 lib/Target/R600/AMDILNIDevice.h create mode 100644 lib/Target/R600/AMDILPeepholeOptimizer.cpp create mode 100644 lib/Target/R600/AMDILRegisterInfo.td create mode 100644 lib/Target/R600/AMDILSIDevice.cpp create mode 100644 lib/Target/R600/AMDILSIDevice.h create mode 100644 lib/Target/R600/CMakeLists.txt create mode 100644 lib/Target/R600/InstPrinter/AMDGPUInstPrinter.cpp create mode 100644 lib/Target/R600/InstPrinter/AMDGPUInstPrinter.h create mode 100644 lib/Target/R600/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/R600/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/R600/InstPrinter/Makefile create mode 100644 lib/Target/R600/LLVMBuild.txt create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUAsmBackend.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCAsmInfo.h create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCCodeEmitter.h create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.cpp create mode 100644 lib/Target/R600/MCTargetDesc/AMDGPUMCTargetDesc.h create mode 100644 lib/Target/R600/MCTargetDesc/CMakeLists.txt create mode 100644 lib/Target/R600/MCTargetDesc/LLVMBuild.txt create mode 100644 lib/Target/R600/MCTargetDesc/Makefile create mode 100644 lib/Target/R600/MCTargetDesc/R600MCCodeEmitter.cpp create mode 100644 lib/Target/R600/MCTargetDesc/SIMCCodeEmitter.cpp create mode 100644 lib/Target/R600/Makefile create mode 100644 lib/Target/R600/Processors.td create mode 100644 lib/Target/R600/R600ControlFlowFinalizer.cpp create mode 100644 lib/Target/R600/R600Defines.h create mode 100644 lib/Target/R600/R600EmitClauseMarkers.cpp create mode 100644 lib/Target/R600/R600ExpandSpecialInstrs.cpp create mode 100644 lib/Target/R600/R600ISelLowering.cpp create mode 100644 lib/Target/R600/R600ISelLowering.h create mode 100644 lib/Target/R600/R600InstrInfo.cpp create mode 100644 lib/Target/R600/R600InstrInfo.h create mode 100644 lib/Target/R600/R600Instructions.td create mode 100644 lib/Target/R600/R600Intrinsics.td create mode 100644 lib/Target/R600/R600MachineFunctionInfo.cpp create mode 100644 lib/Target/R600/R600MachineFunctionInfo.h create mode 100644 lib/Target/R600/R600MachineScheduler.cpp create mode 100644 lib/Target/R600/R600MachineScheduler.h create mode 100644 lib/Target/R600/R600RegisterInfo.cpp create mode 100644 lib/Target/R600/R600RegisterInfo.h create mode 100644 lib/Target/R600/R600RegisterInfo.td create mode 100644 lib/Target/R600/R600Schedule.td create mode 100644 lib/Target/R600/SIAnnotateControlFlow.cpp create mode 100644 lib/Target/R600/SIISelLowering.cpp create mode 100644 lib/Target/R600/SIISelLowering.h create mode 100644 lib/Target/R600/SIInsertWaits.cpp create mode 100644 lib/Target/R600/SIInstrFormats.td create mode 100644 lib/Target/R600/SIInstrInfo.cpp create mode 100644 lib/Target/R600/SIInstrInfo.h create mode 100644 lib/Target/R600/SIInstrInfo.td create mode 100644 lib/Target/R600/SIInstructions.td create mode 100644 lib/Target/R600/SIIntrinsics.td create mode 100644 lib/Target/R600/SILowerControlFlow.cpp create mode 100644 lib/Target/R600/SIMachineFunctionInfo.cpp create mode 100644 lib/Target/R600/SIMachineFunctionInfo.h create mode 100644 lib/Target/R600/SIRegisterInfo.cpp create mode 100644 lib/Target/R600/SIRegisterInfo.h create mode 100644 lib/Target/R600/SIRegisterInfo.td create mode 100644 lib/Target/R600/SISchedule.td create mode 100644 lib/Target/R600/TargetInfo/AMDGPUTargetInfo.cpp create mode 100644 lib/Target/R600/TargetInfo/CMakeLists.txt create mode 100644 lib/Target/R600/TargetInfo/LLVMBuild.txt create mode 100644 lib/Target/R600/TargetInfo/Makefile create mode 100644 lib/Target/Sparc/SparcInstr64Bit.td delete mode 100644 lib/Target/TargetInstrInfo.cpp delete mode 100644 lib/Target/TargetRegisterInfo.cpp delete mode 100644 lib/Target/TargetTransformImpl.cpp delete mode 100644 lib/Target/X86/AsmParser/X86AsmLexer.cpp create mode 100644 lib/Target/X86/X86PadShortFunction.cpp create mode 100644 lib/Target/X86/X86SchedHaswell.td create mode 100644 lib/Target/X86/X86SchedSandyBridge.td create mode 100644 lib/Target/X86/X86TargetTransformInfo.cpp create mode 100644 lib/Target/XCore/Disassembler/CMakeLists.txt create mode 100644 lib/Target/XCore/Disassembler/LLVMBuild.txt create mode 100644 lib/Target/XCore/Disassembler/Makefile create mode 100644 lib/Target/XCore/Disassembler/XCoreDisassembler.cpp create mode 100644 lib/Target/XCore/InstPrinter/CMakeLists.txt create mode 100644 lib/Target/XCore/InstPrinter/LLVMBuild.txt create mode 100644 lib/Target/XCore/InstPrinter/Makefile create mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.cpp create mode 100644 lib/Target/XCore/InstPrinter/XCoreInstPrinter.h create mode 100644 lib/Target/XCore/XCoreMCInstLower.cpp create mode 100644 lib/Target/XCore/XCoreMCInstLower.h delete mode 100644 lib/Transforms/Instrumentation/BlackList.h create mode 100644 lib/Transforms/Instrumentation/MemorySanitizer.cpp create mode 100644 lib/Transforms/ObjCARC/CMakeLists.txt create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/DependencyAnalysis.h create mode 100644 lib/Transforms/ObjCARC/LLVMBuild.txt create mode 100644 lib/Transforms/ObjCARC/Makefile create mode 100644 lib/Transforms/ObjCARC/ObjCARC.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARC.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCAPElim.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCAliasAnalysis.h create mode 100644 lib/Transforms/ObjCARC/ObjCARCContract.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCExpand.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCOpts.cpp create mode 100644 lib/Transforms/ObjCARC/ObjCARCUtil.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.cpp create mode 100644 lib/Transforms/ObjCARC/ProvenanceAnalysis.h delete mode 100644 lib/Transforms/Scalar/ObjCARC.cpp delete mode 100644 lib/Transforms/Utils/AddrModeMatcher.cpp delete mode 100644 lib/VMCore/AsmWriter.cpp delete mode 100644 lib/VMCore/Attributes.cpp delete mode 100644 lib/VMCore/AttributesImpl.h delete mode 100644 lib/VMCore/AutoUpgrade.cpp delete mode 100644 lib/VMCore/BasicBlock.cpp delete mode 100644 lib/VMCore/CMakeLists.txt delete mode 100644 lib/VMCore/ConstantFold.cpp delete mode 100644 lib/VMCore/ConstantFold.h delete mode 100644 lib/VMCore/Constants.cpp delete mode 100644 lib/VMCore/ConstantsContext.h delete mode 100644 lib/VMCore/Core.cpp delete mode 100644 lib/VMCore/DIBuilder.cpp delete mode 100644 lib/VMCore/DataLayout.cpp delete mode 100644 lib/VMCore/DebugInfo.cpp delete mode 100644 lib/VMCore/DebugLoc.cpp delete mode 100644 lib/VMCore/Dominators.cpp delete mode 100644 lib/VMCore/Function.cpp delete mode 100644 lib/VMCore/GCOV.cpp delete mode 100644 lib/VMCore/GVMaterializer.cpp delete mode 100644 lib/VMCore/Globals.cpp delete mode 100644 lib/VMCore/IRBuilder.cpp delete mode 100644 lib/VMCore/InlineAsm.cpp delete mode 100644 lib/VMCore/Instruction.cpp delete mode 100644 lib/VMCore/Instructions.cpp delete mode 100644 lib/VMCore/IntrinsicInst.cpp delete mode 100644 lib/VMCore/LLVMBuild.txt delete mode 100644 lib/VMCore/LLVMContext.cpp delete mode 100644 lib/VMCore/LLVMContextImpl.cpp delete mode 100644 lib/VMCore/LLVMContextImpl.h delete mode 100644 lib/VMCore/LeakDetector.cpp delete mode 100644 lib/VMCore/LeaksContext.h delete mode 100644 lib/VMCore/Makefile delete mode 100644 lib/VMCore/Metadata.cpp delete mode 100644 lib/VMCore/Module.cpp delete mode 100644 lib/VMCore/Pass.cpp delete mode 100644 lib/VMCore/PassManager.cpp delete mode 100644 lib/VMCore/PassRegistry.cpp delete mode 100644 lib/VMCore/PrintModulePass.cpp delete mode 100644 lib/VMCore/SymbolTableListTraitsImpl.h delete mode 100644 lib/VMCore/TargetTransformInfo.cpp delete mode 100644 lib/VMCore/Type.cpp delete mode 100644 lib/VMCore/TypeFinder.cpp delete mode 100644 lib/VMCore/Use.cpp delete mode 100644 lib/VMCore/User.cpp delete mode 100644 lib/VMCore/Value.cpp delete mode 100644 lib/VMCore/ValueSymbolTable.cpp delete mode 100644 lib/VMCore/ValueTypes.cpp delete mode 100644 lib/VMCore/Verifier.cpp (limited to 'lib') diff --git a/lib/Analysis/AliasAnalysis.cpp b/lib/Analysis/AliasAnalysis.cpp index 752edd52b454..210b80ab63ef 100644 --- a/lib/Analysis/AliasAnalysis.cpp +++ b/lib/Analysis/AliasAnalysis.cpp @@ -28,14 +28,14 @@ #include "llvm/Analysis/CaptureTracking.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" #include "llvm/Pass.h" -#include "llvm/BasicBlock.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; @@ -361,8 +361,28 @@ AliasAnalysis::getModRefInfo(const AtomicRMWInst *RMW, const Location &Loc) { } namespace { + // Conservatively return true. Return false, if there is a single path + // starting from "From" and the path does not reach "To". + static bool hasPath(const BasicBlock *From, const BasicBlock *To) { + const unsigned MaxCheck = 5; + const BasicBlock *Current = From; + for (unsigned I = 0; I < MaxCheck; I++) { + unsigned NumSuccs = Current->getTerminator()->getNumSuccessors(); + if (NumSuccs > 1) + return true; + if (NumSuccs == 0) + return false; + Current = Current->getTerminator()->getSuccessor(0); + if (Current == To) + return true; + } + return true; + } + /// Only find pointer captures which happen before the given instruction. Uses /// the dominator tree to determine whether one instruction is before another. + /// Only support the case where the Value is defined in the same basic block + /// as the given instruction and the use. struct CapturesBefore : public CaptureTracker { CapturesBefore(const Instruction *I, DominatorTree *DT) : BeforeHere(I), DT(DT), Captured(false) {} @@ -372,8 +392,15 @@ namespace { bool shouldExplore(Use *U) { Instruction *I = cast(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // We explore this usage only if the usage can reach "BeforeHere". + // If use is not reachable from entry, there is no need to explore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + // If the value is defined in the same basic block as use and BeforeHere, + // there is no need to explore the use if BeforeHere dominates use. + // Check whether there is a path from I to BeforeHere. + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; return true; } @@ -381,8 +408,11 @@ namespace { bool captured(Use *U) { Instruction *I = cast(U->getUser()); BasicBlock *BB = I->getParent(); - if (BeforeHere != I && - (!DT->isReachableFromEntry(BB) || DT->dominates(BeforeHere, I))) + // Same logic as in shouldExplore. + if (BeforeHere != I && !DT->isReachableFromEntry(BB)) + return false; + if (BeforeHere != I && DT->dominates(BeforeHere, I) && + !hasPath(BB, BeforeHere->getParent())) return false; Captured = true; return true; @@ -503,7 +533,7 @@ bool AliasAnalysis::canInstructionRangeModify(const Instruction &I1, bool llvm::isNoAliasCall(const Value *V) { if (isa(V) || isa(V)) return ImmutableCallSite(cast(V)) - .paramHasAttr(0, Attributes::NoAlias); + .paramHasAttr(0, Attribute::NoAlias); return false; } @@ -525,19 +555,3 @@ bool llvm::isIdentifiedObject(const Value *V) { return A->hasNoAliasAttr() || A->hasByValAttr(); return false; } - -/// isKnownNonNull - Return true if we know that the specified value is never -/// null. -bool llvm::isKnownNonNull(const Value *V) { - // Alloca never returns null, malloc might. - if (isa(V)) return true; - - // A byval argument is never null. - if (const Argument *A = dyn_cast(V)) - return A->hasByValAttr(); - - // Global values are not null unless extern weak. - if (const GlobalValue *GV = dyn_cast(V)) - return !GV->hasExternalWeakLinkage(); - return false; -} diff --git a/lib/Analysis/AliasAnalysisCounter.cpp b/lib/Analysis/AliasAnalysisCounter.cpp index 9f219f563739..9f4a47c77e03 100644 --- a/lib/Analysis/AliasAnalysisCounter.cpp +++ b/lib/Analysis/AliasAnalysisCounter.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Pass.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/Analysis/AliasAnalysisEvaluator.cpp b/lib/Analysis/AliasAnalysisEvaluator.cpp index ac72983a8d7b..a571463dfe12 100644 --- a/lib/Analysis/AliasAnalysisEvaluator.cpp +++ b/lib/Analysis/AliasAnalysisEvaluator.cpp @@ -17,19 +17,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Pass.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/InstIterator.h" -#include "llvm/Support/CommandLine.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; static cl::opt PrintAll("print-all-alias-modref-info", cl::ReallyHidden); @@ -44,6 +44,8 @@ static cl::opt PrintMod("print-mod", cl::ReallyHidden); static cl::opt PrintRef("print-ref", cl::ReallyHidden); static cl::opt PrintModRef("print-modref", cl::ReallyHidden); +static cl::opt EvalTBAA("evaluate-tbaa", cl::ReallyHidden); + namespace { class AAEval : public FunctionPass { unsigned NoAlias, MayAlias, PartialAlias, MustAlias; @@ -123,6 +125,15 @@ PrintModRefResults(const char *Msg, bool P, CallSite CSA, CallSite CSB, } } +static inline void +PrintLoadStoreResults(const char *Msg, bool P, const Value *V1, + const Value *V2, const Module *M) { + if (P) { + errs() << " " << Msg << ": " << *V1 + << " <-> " << *V2 << '\n'; + } +} + static inline bool isInterestingPointer(Value *V) { return V->getType()->isPointerTy() && !isa(V); @@ -133,6 +144,8 @@ bool AAEval::runOnFunction(Function &F) { SetVector Pointers; SetVector CallSites; + SetVector Loads; + SetVector Stores; for (Function::arg_iterator I = F.arg_begin(), E = F.arg_end(); I != E; ++I) if (I->getType()->isPointerTy()) // Add all pointer arguments. @@ -141,6 +154,10 @@ bool AAEval::runOnFunction(Function &F) { for (inst_iterator I = inst_begin(F), E = inst_end(F); I != E; ++I) { if (I->getType()->isPointerTy()) // Add all pointer instructions. Pointers.insert(&*I); + if (EvalTBAA && isa(&*I)) + Loads.insert(&*I); + if (EvalTBAA && isa(&*I)) + Stores.insert(&*I); Instruction &Inst = *I; if (CallSite CS = cast(&Inst)) { Value *Callee = CS.getCalledValue(); @@ -197,6 +214,61 @@ bool AAEval::runOnFunction(Function &F) { } } + if (EvalTBAA) { + // iterate over all pairs of load, store + for (SetVector::iterator I1 = Loads.begin(), E = Loads.end(); + I1 != E; ++I1) { + for (SetVector::iterator I2 = Stores.begin(), E2 = Stores.end(); + I2 != E2; ++I2) { + switch (AA.alias(AA.getLocation(cast(*I1)), + AA.getLocation(cast(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + + // iterate over all pairs of store, store + for (SetVector::iterator I1 = Stores.begin(), E = Stores.end(); + I1 != E; ++I1) { + for (SetVector::iterator I2 = Stores.begin(); I2 != I1; ++I2) { + switch (AA.alias(AA.getLocation(cast(*I1)), + AA.getLocation(cast(*I2)))) { + case AliasAnalysis::NoAlias: + PrintLoadStoreResults("NoAlias", PrintNoAlias, *I1, *I2, + F.getParent()); + ++NoAlias; break; + case AliasAnalysis::MayAlias: + PrintLoadStoreResults("MayAlias", PrintMayAlias, *I1, *I2, + F.getParent()); + ++MayAlias; break; + case AliasAnalysis::PartialAlias: + PrintLoadStoreResults("PartialAlias", PrintPartialAlias, *I1, *I2, + F.getParent()); + ++PartialAlias; break; + case AliasAnalysis::MustAlias: + PrintLoadStoreResults("MustAlias", PrintMustAlias, *I1, *I2, + F.getParent()); + ++MustAlias; break; + } + } + } + } + // Mod/ref alias analysis: compare all pairs of calls and values for (SetVector::iterator C = CallSites.begin(), Ce = CallSites.end(); C != Ce; ++C) { diff --git a/lib/Analysis/AliasDebugger.cpp b/lib/Analysis/AliasDebugger.cpp index f15c05153e10..f6178e36f0a9 100644 --- a/lib/Analysis/AliasDebugger.cpp +++ b/lib/Analysis/AliasDebugger.cpp @@ -17,12 +17,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include using namespace llvm; diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 388c755cbd31..591052671d6e 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -13,13 +13,13 @@ #include "llvm/Analysis/AliasSetTracker.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" diff --git a/lib/Analysis/Analysis.cpp b/lib/Analysis/Analysis.cpp index 9dc81a6a630f..66e416cd140c 100644 --- a/lib/Analysis/Analysis.cpp +++ b/lib/Analysis/Analysis.cpp @@ -9,8 +9,8 @@ #include "llvm-c/Analysis.h" #include "llvm-c/Initialization.h" -#include "llvm/InitializePasses.h" #include "llvm/Analysis/Verifier.h" +#include "llvm/InitializePasses.h" #include using namespace llvm; @@ -31,7 +31,6 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeCFGPrinterPass(Registry); initializeCFGOnlyViewerPass(Registry); initializeCFGOnlyPrinterPass(Registry); - initializePrintDbgInfoPass(Registry); initializeDependenceAnalysisPass(Registry); initializeDominanceFrontierPass(Registry); initializeDomViewerPass(Registry); @@ -70,6 +69,7 @@ void llvm::initializeAnalysis(PassRegistry &Registry) { initializeRegionOnlyPrinterPass(Registry); initializeScalarEvolutionPass(Registry); initializeScalarEvolutionAliasAnalysisPass(Registry); + initializeTargetTransformInfoAnalysisGroup(Registry); initializeTypeBasedAliasAnalysisPass(Registry); } diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index 4bb93ee88a49..ae6da1af0c4f 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -13,28 +13,28 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" -#include "llvm/Pass.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" -#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" +#include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -88,7 +88,7 @@ static uint64_t getObjectSize(const Value *V, const DataLayout &TD, const TargetLibraryInfo &TLI, bool RoundToAlign = false) { uint64_t Size; - if (getObjectSize(V, Size, &TD, &TLI, RoundToAlign)) + if (getUnderlyingObjectSize(V, Size, &TD, &TLI, RoundToAlign)) return Size; return AliasAnalysis::UnknownSize; } @@ -631,7 +631,7 @@ BasicAliasAnalysis::getModRefBehavior(const Function *F) { // For intrinsics, we can check the table. if (unsigned iid = F->getIntrinsicID()) { #define GET_INTRINSIC_MODREF_BEHAVIOR -#include "llvm/Intrinsics.gen" +#include "llvm/IR/Intrinsics.gen" #undef GET_INTRINSIC_MODREF_BEHAVIOR } @@ -851,9 +851,13 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, // pointers, figure out if the indexes to the GEP tell us anything about the // derived pointer. if (const GEPOperator *GEP2 = dyn_cast(V2)) { + // Do the base pointers alias? + AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, + UnderlyingV2, UnknownSize, 0); + // Check for geps of non-aliasing underlying pointers where the offsets are // identical. - if (V1Size == V2Size) { + if ((BaseAlias == MayAlias) && V1Size == V2Size) { // Do the base pointers alias assuming type and size. AliasResult PreciseBaseAlias = aliasCheck(UnderlyingV1, V1Size, V1TBAAInfo, UnderlyingV2, @@ -881,10 +885,6 @@ BasicAliasAnalysis::aliasGEP(const GEPOperator *GEP1, uint64_t V1Size, GEP1VariableIndices.clear(); } } - - // Do the base pointers alias? - AliasResult BaseAlias = aliasCheck(UnderlyingV1, UnknownSize, 0, - UnderlyingV2, UnknownSize, 0); // If we get a No or May, then return it immediately, no amount of analysis // will improve this situation. @@ -1064,39 +1064,20 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, Location(V2, V2Size, V2TBAAInfo)); if (PN > V2) std::swap(Locs.first, Locs.second); - - AliasResult Alias = - aliasCheck(PN->getIncomingValue(0), PNSize, PNTBAAInfo, - PN2->getIncomingValueForBlock(PN->getIncomingBlock(0)), - V2Size, V2TBAAInfo); - if (Alias == MayAlias) - return MayAlias; - - // If the first source of the PHI nodes NoAlias and the other inputs are - // the PHI node itself through some amount of recursion this does not add - // any new information so just return NoAlias. - // bb: - // ptr = ptr2 + 1 - // loop: - // ptr_phi = phi [bb, ptr], [loop, ptr_plus_one] - // ptr2_phi = phi [bb, ptr2], [loop, ptr2_plus_one] - // ... - // ptr_plus_one = gep ptr_phi, 1 - // ptr2_plus_one = gep ptr2_phi, 1 - // We assume for the recursion that the the phis (ptr_phi, ptr2_phi) do - // not alias each other. - bool ArePhisAssumedNoAlias = false; - AliasResult OrigAliasResult = NoAlias; - if (Alias == NoAlias) { - // Pretend the phis do not alias. - assert(AliasCache.count(Locs) && - "There must exist an entry for the phi node"); - OrigAliasResult = AliasCache[Locs]; - AliasCache[Locs] = NoAlias; - ArePhisAssumedNoAlias = true; - } - - for (unsigned i = 1, e = PN->getNumIncomingValues(); i != e; ++i) { + // Analyse the PHIs' inputs under the assumption that the PHIs are + // NoAlias. + // If the PHIs are May/MustAlias there must be (recursively) an input + // operand from outside the PHIs' cycle that is MayAlias/MustAlias or + // there must be an operation on the PHIs within the PHIs' value cycle + // that causes a MayAlias. + // Pretend the phis do not alias. + AliasResult Alias = NoAlias; + assert(AliasCache.count(Locs) && + "There must exist an entry for the phi node"); + AliasResult OrigAliasResult = AliasCache[Locs]; + AliasCache[Locs] = NoAlias; + + for (unsigned i = 0, e = PN->getNumIncomingValues(); i != e; ++i) { AliasResult ThisAlias = aliasCheck(PN->getIncomingValue(i), PNSize, PNTBAAInfo, PN2->getIncomingValueForBlock(PN->getIncomingBlock(i)), @@ -1107,7 +1088,7 @@ BasicAliasAnalysis::aliasPHI(const PHINode *PN, uint64_t PNSize, } // Reset if speculation failed. - if (ArePhisAssumedNoAlias && Alias != NoAlias) + if (Alias != NoAlias) AliasCache[Locs] = OrigAliasResult; return Alias; diff --git a/lib/Analysis/BlockFrequencyInfo.cpp b/lib/Analysis/BlockFrequencyInfo.cpp index 8a660f737c9b..100e5c8ae7dd 100644 --- a/lib/Analysis/BlockFrequencyInfo.cpp +++ b/lib/Analysis/BlockFrequencyInfo.cpp @@ -11,12 +11,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/lib/Analysis/BranchProbabilityInfo.cpp b/lib/Analysis/BranchProbabilityInfo.cpp index 04a6560262cb..6c5885601fa3 100644 --- a/lib/Analysis/BranchProbabilityInfo.cpp +++ b/lib/Analysis/BranchProbabilityInfo.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/Analysis/LoopInfo.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/Analysis/LoopInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/CFGPrinter.cpp b/lib/Analysis/CFGPrinter.cpp index 76854000bd23..9b6879a42ed4 100644 --- a/lib/Analysis/CFGPrinter.cpp +++ b/lib/Analysis/CFGPrinter.cpp @@ -18,7 +18,6 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CFGPrinter.h" - #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index b3a40bee4211..597c767a8e04 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -13,12 +13,10 @@ add_llvm_library(LLVMAnalysis CostModel.cpp CodeMetrics.cpp ConstantFolding.cpp - DbgInfoPrinter.cpp DependenceAnalysis.cpp DomPrinter.cpp DominanceFrontier.cpp IVUsers.cpp - InlineCost.cpp InstCount.cpp InstructionSimplify.cpp Interval.cpp @@ -47,6 +45,7 @@ add_llvm_library(LLVMAnalysis ProfileVerifierPass.cpp ProfileDataLoader.cpp ProfileDataLoaderPass.cpp + PtrUseVisitor.cpp RegionInfo.cpp RegionPass.cpp RegionPrinter.cpp @@ -55,6 +54,7 @@ add_llvm_library(LLVMAnalysis ScalarEvolutionExpander.cpp ScalarEvolutionNormalization.cpp SparsePropagation.cpp + TargetTransformInfo.cpp Trace.cpp TypeBasedAliasAnalysis.cpp ValueTracking.cpp diff --git a/lib/Analysis/CaptureTracking.cpp b/lib/Analysis/CaptureTracking.cpp index d9c02990a801..a7292706dfa8 100644 --- a/lib/Analysis/CaptureTracking.cpp +++ b/lib/Analysis/CaptureTracking.cpp @@ -18,7 +18,12 @@ #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" + using namespace llvm; CaptureTracker::~CaptureTracker() {} diff --git a/lib/Analysis/CodeMetrics.cpp b/lib/Analysis/CodeMetrics.cpp index 651a54be1b9e..8cda01a24c0d 100644 --- a/lib/Analysis/CodeMetrics.cpp +++ b/lib/Analysis/CodeMetrics.cpp @@ -12,121 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CodeMetrics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CallSite.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/DataLayout.h" using namespace llvm; -/// callIsSmall - If a call is likely to lower to a single target instruction, -/// or is otherwise deemed small return true. -/// TODO: Perhaps calls like memcpy, strcpy, etc? -bool llvm::callIsSmall(ImmutableCallSite CS) { - if (isa(CS.getInstruction())) - return true; - - const Function *F = CS.getCalledFunction(); - if (!F) return false; - - if (F->hasLocalLinkage()) return false; - - if (!F->hasName()) return false; - - StringRef Name = F->getName(); - - // These will all likely lower to a single selection DAG node. - if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || - Name == "fabs" || Name == "fabsf" || Name == "fabsl" || - Name == "sin" || Name == "sinf" || Name == "sinl" || - Name == "cos" || Name == "cosf" || Name == "cosl" || - Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl" ) - return true; - - // These are all likely to be optimized into something smaller. - if (Name == "pow" || Name == "powf" || Name == "powl" || - Name == "exp2" || Name == "exp2l" || Name == "exp2f" || - Name == "floor" || Name == "floorf" || Name == "ceil" || - Name == "round" || Name == "ffs" || Name == "ffsl" || - Name == "abs" || Name == "labs" || Name == "llabs") - return true; - - return false; -} - -bool llvm::isInstructionFree(const Instruction *I, const DataLayout *TD) { - if (isa(I)) - return true; - - // If a GEP has all constant indices, it will probably be folded with - // a load/store. - if (const GetElementPtrInst *GEP = dyn_cast(I)) - return GEP->hasAllConstantIndices(); - - if (const IntrinsicInst *II = dyn_cast(I)) { - switch (II->getIntrinsicID()) { - default: - return false; - case Intrinsic::dbg_declare: - case Intrinsic::dbg_value: - case Intrinsic::invariant_start: - case Intrinsic::invariant_end: - case Intrinsic::lifetime_start: - case Intrinsic::lifetime_end: - case Intrinsic::objectsize: - case Intrinsic::ptr_annotation: - case Intrinsic::var_annotation: - // These intrinsics don't count as size. - return true; - } - } - - if (const CastInst *CI = dyn_cast(I)) { - // Noop casts, including ptr <-> int, don't count. - if (CI->isLosslessCast()) - return true; - - Value *Op = CI->getOperand(0); - // An inttoptr cast is free so long as the input is a legal integer type - // which doesn't contain values outside the range of a pointer. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() <= TD->getPointerSizeInBits()) - return true; - - // A ptrtoint cast is free so long as the result is large enough to store - // the pointer, and a legal integer type. - if (isa(CI) && TD && - TD->isLegalInteger(Op->getType()->getScalarSizeInBits()) && - Op->getType()->getScalarSizeInBits() >= TD->getPointerSizeInBits()) - return true; - - // trunc to a native type is free (assuming the target has compare and - // shift-right of the same width). - if (TD && isa(CI) && - TD->isLegalInteger(TD->getTypeSizeInBits(CI->getType()))) - return true; - // Result of a cmp instruction is often extended (to be used by other - // cmp instructions, logical or return instructions). These are usually - // nop on most sane targets. - if (isa(CI->getOperand(0))) - return true; - } - - return false; -} - /// analyzeBasicBlock - Fill in the current structure with information gleaned /// from the specified block. void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, - const DataLayout *TD) { + const TargetTransformInfo &TTI) { ++NumBlocks; unsigned NumInstsBeforeThisBB = NumInsts; for (BasicBlock::const_iterator II = BB->begin(), E = BB->end(); II != E; ++II) { - if (isInstructionFree(II, TD)) - continue; - // Special handling for calls. if (isa(II) || isa(II)) { ImmutableCallSite CS(cast(II)); @@ -144,12 +45,10 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // for that case. if (F == BB->getParent()) isRecursive = true; - } - - if (!callIsSmall(CS)) { - // Each argument to a call takes on average one instruction to set up. - NumInsts += CS.arg_size(); + if (TTI.isLoweredToCall(F)) + ++NumCalls; + } else { // We don't want inline asm to count as a call - that would prevent loop // unrolling. The argument setup cost is still real, though. if (!isa(CS.getCalledValue())) @@ -165,7 +64,15 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, if (isa(II) || II->getType()->isVectorTy()) ++NumVectorInsts; - ++NumInsts; + if (const CallInst *CI = dyn_cast(II)) + if (CI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + if (const InvokeInst *InvI = dyn_cast(II)) + if (InvI->hasFnAttr(Attribute::NoDuplicate)) + notDuplicatable = true; + + NumInsts += TTI.getUserCost(&*II); } if (isa(BB->getTerminator())) @@ -182,23 +89,8 @@ void CodeMetrics::analyzeBasicBlock(const BasicBlock *BB, // if someone is using a blockaddress without an indirectbr, and that // reference somehow ends up in another function or global, we probably // don't want to inline this function. - if (isa(BB->getTerminator())) - containsIndirectBr = true; + notDuplicatable |= isa(BB->getTerminator()); // Remember NumInsts for this BB. NumBBInsts[BB] = NumInsts - NumInstsBeforeThisBB; } - -void CodeMetrics::analyzeFunction(Function *F, const DataLayout *TD) { - // If this function contains a call that "returns twice" (e.g., setjmp or - // _setjmp) and it isn't marked with "returns twice" itself, never inline it. - // This is a hack because we depend on the user marking their local variables - // as volatile if they are live across a setjmp call, and they probably - // won't do this in callers. - exposesReturnsTwice = F->callsFunctionThatReturnsTwice() && - !F->getFnAttributes().hasAttribute(Attributes::ReturnsTwice); - - // Look at the size of the callee. - for (Function::const_iterator BB = F->begin(), E = F->end(); BB != E; ++BB) - analyzeBasicBlock(&*BB, TD); -} diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 91a5b84e8a63..09d7608c51da 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -9,30 +9,30 @@ // // This file defines routines for folding instructions into constants. // -// Also, to supplement the basic VMCore ConstantExpr simplifications, +// Also, to supplement the basic IR ConstantExpr simplifications, // this file defines some additional folding routines that can make use of -// DataLayout information. These functions cannot go in VMCore due to library +// DataLayout information. These functions cannot go in IR due to library // dependency issues. // //===----------------------------------------------------------------------===// #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Operator.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FEnv.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/FEnv.h" +#include "llvm/Target/TargetLibraryInfo.h" #include #include using namespace llvm; @@ -54,13 +54,12 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Handle a vector->integer cast. if (IntegerType *IT = dyn_cast(DestTy)) { - ConstantDataVector *CDV = dyn_cast(C); - if (CDV == 0) + VectorType *VTy = dyn_cast(C->getType()); + if (VTy == 0) return ConstantExpr::getBitCast(C, DestTy); - unsigned NumSrcElts = CDV->getType()->getNumElements(); - - Type *SrcEltTy = CDV->getType()->getElementType(); + unsigned NumSrcElts = VTy->getNumElements(); + Type *SrcEltTy = VTy->getElementType(); // If the vector is a vector of floating point, convert it to vector of int // to simplify things. @@ -68,11 +67,14 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElts); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - CDV = cast(C); } + ConstantDataVector *CDV = dyn_cast(C); + if (CDV == 0) + return ConstantExpr::getBitCast(C, DestTy); + // Now that we know that the input value is a vector of integers, just shift // and insert them into our result. unsigned BitShift = TD.getTypeAllocSizeInBits(SrcEltTy); @@ -104,7 +106,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, if (!isa(C) && !isa(C)) return ConstantExpr::getBitCast(C, DestTy); - // If the element types match, VMCore can fold it. + // If the element types match, IR can fold it. unsigned NumDstElt = DestVTy->getNumElements(); unsigned NumSrcElt = C->getType()->getVectorNumElements(); if (NumDstElt == NumSrcElt) @@ -131,7 +133,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, // Recursively handle this integer conversion, if possible. C = FoldBitCast(C, DestIVTy, TD); - // Finally, VMCore can handle this now that #elts line up. + // Finally, IR can handle this now that #elts line up. return ConstantExpr::getBitCast(C, DestTy); } @@ -141,9 +143,9 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, unsigned FPWidth = SrcEltTy->getPrimitiveSizeInBits(); Type *SrcIVTy = VectorType::get(IntegerType::get(C->getContext(), FPWidth), NumSrcElt); - // Ask VMCore to do the conversion now that #elts line up. + // Ask IR to do the conversion now that #elts line up. C = ConstantExpr::getBitCast(C, SrcIVTy); - // If VMCore wasn't able to fold it, bail out. + // If IR wasn't able to fold it, bail out. if (!isa(C) && // FIXME: Remove ConstantVector. !isa(C)) return C; @@ -218,10 +220,10 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, /// from a global, return the global and the constant. Because of /// constantexprs, this function is recursive. static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, - int64_t &Offset, const DataLayout &TD) { + APInt &Offset, const DataLayout &TD) { // Trivial case, constant is the global. if ((GV = dyn_cast(C))) { - Offset = 0; + Offset.clearAllBits(); return true; } @@ -235,34 +237,13 @@ static bool IsConstantOffsetFromGlobal(Constant *C, GlobalValue *&GV, return IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD); // i32* getelementptr ([5 x i32]* @a, i32 0, i32 5) - if (CE->getOpcode() == Instruction::GetElementPtr) { - // Cannot compute this if the element type of the pointer is missing size - // info. - if (!cast(CE->getOperand(0)->getType()) - ->getElementType()->isSized()) - return false; - + if (GEPOperator *GEP = dyn_cast(CE)) { // If the base isn't a global+constant, we aren't either. if (!IsConstantOffsetFromGlobal(CE->getOperand(0), GV, Offset, TD)) return false; // Otherwise, add any offset that our operands provide. - gep_type_iterator GTI = gep_type_begin(CE); - for (User::const_op_iterator i = CE->op_begin() + 1, e = CE->op_end(); - i != e; ++i, ++GTI) { - ConstantInt *CI = dyn_cast(*i); - if (!CI) return false; // Index isn't a simple constant? - if (CI->isZero()) continue; // Not adding anything. - - if (StructType *ST = dyn_cast(*GTI)) { - // N = N + Offset - Offset += TD.getStructLayout(ST)->getElementOffset(CI->getZExtValue()); - } else { - SequentialType *SQT = cast(*GTI); - Offset += TD.getTypeAllocSize(SQT->getElementType())*CI->getSExtValue(); - } - } - return true; + return GEP->accumulateConstantOffset(TD, Offset); } return false; @@ -310,6 +291,10 @@ static bool ReadDataFromGlobal(Constant *C, uint64_t ByteOffset, C = FoldBitCast(C, Type::getInt32Ty(C->getContext()), TD); return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); } + if (CFP->getType()->isHalfTy()){ + C = FoldBitCast(C, Type::getInt16Ty(C->getContext()), TD); + return ReadDataFromGlobal(C, ByteOffset, CurPtr, BytesLeft, TD); + } return false; } @@ -402,7 +387,9 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // that address spaces don't matter here since we're not going to result in // an actual new load. Type *MapTy; - if (LoadTy->isFloatTy()) + if (LoadTy->isHalfTy()) + MapTy = Type::getInt16PtrTy(C->getContext()); + else if (LoadTy->isFloatTy()) MapTy = Type::getInt32PtrTy(C->getContext()); else if (LoadTy->isDoubleTy()) MapTy = Type::getInt64PtrTy(C->getContext()); @@ -423,7 +410,7 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, if (BytesLoaded > 32 || BytesLoaded == 0) return 0; GlobalValue *GVal; - int64_t Offset; + APInt Offset(TD.getPointerSizeInBits(), 0); if (!IsConstantOffsetFromGlobal(C, GVal, Offset, TD)) return 0; @@ -434,14 +421,15 @@ static Constant *FoldReinterpretLoadFromConstPtr(Constant *C, // If we're loading off the beginning of the global, some bytes may be valid, // but we don't try to handle this. - if (Offset < 0) return 0; + if (Offset.isNegative()) return 0; // If we're not accessing anything in this constant, the result is undefined. - if (uint64_t(Offset) >= TD.getTypeAllocSize(GV->getInitializer()->getType())) + if (Offset.getZExtValue() >= + TD.getTypeAllocSize(GV->getInitializer()->getType())) return UndefValue::get(IntType); unsigned char RawBytes[32] = {0}; - if (!ReadDataFromGlobal(GV->getInitializer(), Offset, RawBytes, + if (!ReadDataFromGlobal(GV->getInitializer(), Offset.getZExtValue(), RawBytes, BytesLoaded, TD)) return 0; @@ -550,10 +538,10 @@ static Constant *ConstantFoldLoadInst(const LoadInst *LI, const DataLayout *TD){ /// SymbolicallyEvaluateBinop - One of Op0/Op1 is a constant expression. /// Attempt to symbolically evaluate the result of a binary operator merging -/// these together. If target data info is available, it is provided as TD, -/// otherwise TD is null. +/// these together. If target data info is available, it is provided as DL, +/// otherwise DL is null. static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, - Constant *Op1, const DataLayout *TD){ + Constant *Op1, const DataLayout *DL){ // SROA // Fold (and 0xffffffff00000000, (shl x, 32)) -> shl. @@ -561,17 +549,44 @@ static Constant *SymbolicallyEvaluateBinop(unsigned Opc, Constant *Op0, // bits. + if (Opc == Instruction::And && DL) { + unsigned BitWidth = DL->getTypeSizeInBits(Op0->getType()); + APInt KnownZero0(BitWidth, 0), KnownOne0(BitWidth, 0); + APInt KnownZero1(BitWidth, 0), KnownOne1(BitWidth, 0); + ComputeMaskedBits(Op0, KnownZero0, KnownOne0, DL); + ComputeMaskedBits(Op1, KnownZero1, KnownOne1, DL); + if ((KnownOne1 | KnownZero0).isAllOnesValue()) { + // All the bits of Op0 that the 'and' could be masking are already zero. + return Op0; + } + if ((KnownOne0 | KnownZero1).isAllOnesValue()) { + // All the bits of Op1 that the 'and' could be masking are already zero. + return Op1; + } + + APInt KnownZero = KnownZero0 | KnownZero1; + APInt KnownOne = KnownOne0 & KnownOne1; + if ((KnownZero | KnownOne).isAllOnesValue()) { + return ConstantInt::get(Op0->getType(), KnownOne); + } + } + // If the constant expr is something like &A[123] - &A[4].f, fold this into a // constant. This happens frequently when iterating over a global array. - if (Opc == Instruction::Sub && TD) { + if (Opc == Instruction::Sub && DL) { GlobalValue *GV1, *GV2; - int64_t Offs1, Offs2; + unsigned PtrSize = DL->getPointerSizeInBits(); + unsigned OpSize = DL->getTypeSizeInBits(Op0->getType()); + APInt Offs1(PtrSize, 0), Offs2(PtrSize, 0); - if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *TD)) - if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *TD) && + if (IsConstantOffsetFromGlobal(Op0, GV1, Offs1, *DL)) + if (IsConstantOffsetFromGlobal(Op1, GV2, Offs2, *DL) && GV1 == GV2) { // (&GV+C1) - (&GV+C2) -> C1-C2, pointer arithmetic cannot overflow. - return ConstantInt::get(Op0->getType(), Offs1-Offs2); + // PtrToInt may change the bitwidth so we have convert to the right size + // first. + return ConstantInt::get(Op0->getType(), Offs1.zextOrTrunc(OpSize) - + Offs2.zextOrTrunc(OpSize)); } } @@ -1104,6 +1119,13 @@ Constant *llvm::ConstantFoldLoadThroughGEPIndices(Constant *C, bool llvm::canConstantFoldCallTo(const Function *F) { switch (F->getIntrinsicID()) { + case Intrinsic::fabs: + case Intrinsic::log: + case Intrinsic::log2: + case Intrinsic::log10: + case Intrinsic::exp: + case Intrinsic::exp2: + case Intrinsic::floor: case Intrinsic::sqrt: case Intrinsic::pow: case Intrinsic::powi: @@ -1142,8 +1164,7 @@ llvm::canConstantFoldCallTo(const Function *F) { switch (Name[0]) { default: return false; case 'a': - return Name == "acos" || Name == "asin" || - Name == "atan" || Name == "atan2"; + return Name == "acos" || Name == "asin" || Name == "atan" || Name =="atan2"; case 'c': return Name == "cos" || Name == "ceil" || Name == "cosf" || Name == "cosh"; case 'e': @@ -1171,11 +1192,17 @@ static Constant *ConstantFoldFP(double (*NativeFP)(double), double V, return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), @@ -1187,11 +1214,17 @@ static Constant *ConstantFoldBinaryFP(double (*NativeFP)(double, double), return 0; } + if (Ty->isHalfTy()) { + APFloat APF(V); + bool unused; + APF.convert(APFloat::IEEEhalf, APFloat::rmNearestTiesToEven, &unused); + return ConstantFP::get(Ty->getContext(), APF); + } if (Ty->isFloatTy()) return ConstantFP::get(Ty->getContext(), APFloat((float)V)); if (Ty->isDoubleTy()) return ConstantFP::get(Ty->getContext(), APFloat(V)); - llvm_unreachable("Can only constant fold float/double"); + llvm_unreachable("Can only constant fold half/float/double"); } /// ConstantFoldConvertToInt - Attempt to an SSE floating point to integer @@ -1243,7 +1276,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (!TLI) return 0; - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; /// We only fold functions with finite arguments. Folding NaN and inf is @@ -1256,8 +1289,46 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, /// the host native double versions. Float versions are not called /// directly but for all these it is true (float)(f((double)arg)) == /// f(arg). Long double not supported yet. - double V = Ty->isFloatTy() ? (double)Op->getValueAPF().convertToFloat() : - Op->getValueAPF().convertToDouble(); + double V; + if (Ty->isFloatTy()) + V = Op->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + V = Op->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + V = APF.convertToDouble(); + } + + switch (F->getIntrinsicID()) { + default: break; + case Intrinsic::fabs: + return ConstantFoldFP(fabs, V, Ty); +#if HAVE_LOG2 + case Intrinsic::log2: + return ConstantFoldFP(log2, V, Ty); +#endif +#if HAVE_LOG + case Intrinsic::log: + return ConstantFoldFP(log, V, Ty); +#endif +#if HAVE_LOG10 + case Intrinsic::log10: + return ConstantFoldFP(log10, V, Ty); +#endif +#if HAVE_EXP + case Intrinsic::exp: + return ConstantFoldFP(exp, V, Ty); +#endif +#if HAVE_EXP2 + case Intrinsic::exp2: + return ConstantFoldFP(exp2, V, Ty); +#endif + case Intrinsic::floor: + return ConstantFoldFP(floor, V, Ty); + } + switch (Name[0]) { case 'a': if (Name == "acos" && TLI->has(LibFunc::acos)) @@ -1299,7 +1370,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, else if (Name == "log10" && V > 0 && TLI->has(LibFunc::log10)) return ConstantFoldFP(log10, V, Ty); else if (F->getIntrinsicID() == Intrinsic::sqrt && - (Ty->isFloatTy() || Ty->isDoubleTy())) { + (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy())) { if (V >= -0.0) return ConstantFoldFP(sqrt, V, Ty); else // Undefined @@ -1337,7 +1408,7 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, case Intrinsic::ctpop: return ConstantInt::get(Ty, Op->getValue().countPopulation()); case Intrinsic::convert_from_fp16: { - APFloat Val(Op->getValue()); + APFloat Val(APFloat::IEEEhalf, Op->getValue()); bool lost = false; APFloat::opStatus status = @@ -1391,18 +1462,35 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Operands.size() == 2) { if (ConstantFP *Op1 = dyn_cast(Operands[0])) { - if (!Ty->isFloatTy() && !Ty->isDoubleTy()) + if (!Ty->isHalfTy() && !Ty->isFloatTy() && !Ty->isDoubleTy()) return 0; - double Op1V = Ty->isFloatTy() ? - (double)Op1->getValueAPF().convertToFloat() : - Op1->getValueAPF().convertToDouble(); + double Op1V; + if (Ty->isFloatTy()) + Op1V = Op1->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op1V = Op1->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op1->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op1V = APF.convertToDouble(); + } + if (ConstantFP *Op2 = dyn_cast(Operands[1])) { if (Op2->getType() != Op1->getType()) return 0; - double Op2V = Ty->isFloatTy() ? - (double)Op2->getValueAPF().convertToFloat(): - Op2->getValueAPF().convertToDouble(); + double Op2V; + if (Ty->isFloatTy()) + Op2V = Op2->getValueAPF().convertToFloat(); + else if (Ty->isDoubleTy()) + Op2V = Op2->getValueAPF().convertToDouble(); + else { + bool unused; + APFloat APF = Op2->getValueAPF(); + APF.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &unused); + Op2V = APF.convertToDouble(); + } if (F->getIntrinsicID() == Intrinsic::pow) { return ConstantFoldBinaryFP(pow, Op1V, Op2V, Ty); @@ -1416,6 +1504,10 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, if (Name == "atan2" && TLI->has(LibFunc::atan2)) return ConstantFoldBinaryFP(atan2, Op1V, Op2V, Ty); } else if (ConstantInt *Op2C = dyn_cast(Operands[1])) { + if (F->getIntrinsicID() == Intrinsic::powi && Ty->isHalfTy()) + return ConstantFP::get(F->getContext(), + APFloat((float)std::pow((float)Op1V, + (int)Op2C->getZExtValue()))); if (F->getIntrinsicID() == Intrinsic::powi && Ty->isFloatTy()) return ConstantFP::get(F->getContext(), APFloat((float)std::pow((float)Op1V, @@ -1468,12 +1560,12 @@ llvm::ConstantFoldCall(Function *F, ArrayRef Operands, return ConstantStruct::get(cast(F->getReturnType()), Ops); } case Intrinsic::cttz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // cttz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countTrailingZeros()); case Intrinsic::ctlz: - // FIXME: This should check for Op2 == 1, and become unreachable if - // Op1 == 0. + if (Op2->isOne() && Op1->isZero()) // ctlz(0, 1) is undef. + return UndefValue::get(Ty); return ConstantInt::get(Ty, Op1->getValue().countLeadingZeros()); } } diff --git a/lib/Analysis/CostModel.cpp b/lib/Analysis/CostModel.cpp index 5adbf458104e..98a7780ad9a6 100644 --- a/lib/Analysis/CostModel.cpp +++ b/lib/Analysis/CostModel.cpp @@ -8,20 +8,24 @@ //===----------------------------------------------------------------------===// // // This file defines the cost model analysis. It provides a very basic cost -// estimation for LLVM-IR. The cost result can be thought of as cycles, but it -// is really unit-less. The estimated cost is ment to be used for comparing -// alternatives. +// estimation for LLVM-IR. This analysis uses the services of the codegen +// to approximate the cost of any IR instruction when lowered to machine +// instructions. The cost results are unit-less and the cost number represents +// the throughput of the machine assuming that all loads hit the cache, all +// branches are predicted, etc. The cost numbers can be added in order to +// compare two or more transformation alternatives. // //===----------------------------------------------------------------------===// #define CM_NAME "cost-model" #define DEBUG_TYPE CM_NAME #include "llvm/Analysis/Passes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Value.h" #include "llvm/Pass.h" -#include "llvm/TargetTransformInfo.h" -#include "llvm/Value.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -31,7 +35,7 @@ namespace { public: static char ID; // Class identification, replacement for typeinfo - CostModelAnalysis() : FunctionPass(ID), F(0), VTTI(0) { + CostModelAnalysis() : FunctionPass(ID), F(0), TTI(0) { initializeCostModelAnalysisPass( *PassRegistry::getPassRegistry()); } @@ -40,7 +44,7 @@ namespace { /// Returns -1 if the cost is unknown. /// Note, this method does not cache the cost calculation and it /// can be expensive in some cases. - unsigned getInstructionCost(Instruction *I) const; + unsigned getInstructionCost(const Instruction *I) const; private: virtual void getAnalysisUsage(AnalysisUsage &AU) const; @@ -49,8 +53,8 @@ namespace { /// The function that we analyze. Function *F; - /// Vector target information. - const VectorTargetTransformInfo *VTTI; + /// Target information. + const TargetTransformInfo *TTI; }; } // End of anonymous namespace @@ -72,25 +76,49 @@ CostModelAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { bool CostModelAnalysis::runOnFunction(Function &F) { this->F = &F; - - // Target information. - TargetTransformInfo *TTI; TTI = getAnalysisIfAvailable(); - if (TTI) - VTTI = TTI->getVectorTargetTransformInfo(); return false; } -unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { - if (!VTTI) +static bool isReverseVectorMask(SmallVector &Mask) { + for (unsigned i = 0, MaskSize = Mask.size(); i < MaskSize; ++i) + if (Mask[i] > 0 && Mask[i] != (int)(MaskSize - 1 - i)) + return false; + return true; +} + +static TargetTransformInfo::OperandValueKind getOperandInfo(Value *V) { + TargetTransformInfo::OperandValueKind OpInfo = + TargetTransformInfo::OK_AnyValue; + + // Check for a splat of a constant. + ConstantDataVector *CDV = 0; + if ((CDV = dyn_cast(V))) + if (CDV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + ConstantVector *CV = 0; + if ((CV = dyn_cast(V))) + if (CV->getSplatValue() != NULL) + OpInfo = TargetTransformInfo::OK_UniformConstantValue; + + return OpInfo; +} + +unsigned CostModelAnalysis::getInstructionCost(const Instruction *I) const { + if (!TTI) return -1; switch (I->getOpcode()) { + case Instruction::GetElementPtr:{ + Type *ValTy = I->getOperand(0)->getType()->getPointerElementType(); + return TTI->getAddressComputationCost(ValTy); + } + case Instruction::Ret: case Instruction::PHI: case Instruction::Br: { - return VTTI->getCFInstrCost(I->getOpcode()); + return TTI->getCFInstrCost(I->getOpcode()); } case Instruction::Add: case Instruction::FAdd: @@ -110,28 +138,33 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::And: case Instruction::Or: case Instruction::Xor: { - return VTTI->getArithmeticInstrCost(I->getOpcode(), I->getType()); + TargetTransformInfo::OperandValueKind Op1VK = + getOperandInfo(I->getOperand(0)); + TargetTransformInfo::OperandValueKind Op2VK = + getOperandInfo(I->getOperand(1)); + return TTI->getArithmeticInstrCost(I->getOpcode(), I->getType(), Op1VK, + Op2VK); } case Instruction::Select: { - SelectInst *SI = cast(I); + const SelectInst *SI = cast(I); Type *CondTy = SI->getCondition()->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), I->getType(), CondTy); } case Instruction::ICmp: case Instruction::FCmp: { Type *ValTy = I->getOperand(0)->getType(); - return VTTI->getCmpSelInstrCost(I->getOpcode(), ValTy); + return TTI->getCmpSelInstrCost(I->getOpcode(), ValTy); } case Instruction::Store: { - StoreInst *SI = cast(I); + const StoreInst *SI = cast(I); Type *ValTy = SI->getValueOperand()->getType(); - return VTTI->getMemoryOpCost(I->getOpcode(), ValTy, + return TTI->getMemoryOpCost(I->getOpcode(), ValTy, SI->getAlignment(), SI->getPointerAddressSpace()); } case Instruction::Load: { - LoadInst *LI = cast(I); - return VTTI->getMemoryOpCost(I->getOpcode(), I->getType(), + const LoadInst *LI = cast(I); + return TTI->getMemoryOpCost(I->getOpcode(), I->getType(), LI->getAlignment(), LI->getPointerAddressSpace()); } @@ -148,26 +181,47 @@ unsigned CostModelAnalysis::getInstructionCost(Instruction *I) const { case Instruction::FPTrunc: case Instruction::BitCast: { Type *SrcTy = I->getOperand(0)->getType(); - return VTTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); + return TTI->getCastInstrCost(I->getOpcode(), I->getType(), SrcTy); } case Instruction::ExtractElement: { - ExtractElementInst * EEI = cast(I); + const ExtractElementInst * EEI = cast(I); ConstantInt *CI = dyn_cast(I->getOperand(1)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - EEI->getOperand(0)->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + EEI->getOperand(0)->getType(), Idx); } case Instruction::InsertElement: { - InsertElementInst * IE = cast(I); + const InsertElementInst * IE = cast(I); ConstantInt *CI = dyn_cast(IE->getOperand(2)); unsigned Idx = -1; if (CI) Idx = CI->getZExtValue(); - return VTTI->getVectorInstrCost(I->getOpcode(), - IE->getType(), Idx); + return TTI->getVectorInstrCost(I->getOpcode(), + IE->getType(), Idx); } + case Instruction::ShuffleVector: { + const ShuffleVectorInst *Shuffle = cast(I); + Type *VecTypOp0 = Shuffle->getOperand(0)->getType(); + unsigned NumVecElems = VecTypOp0->getVectorNumElements(); + SmallVector Mask = Shuffle->getShuffleMask(); + + if (NumVecElems == Mask.size() && isReverseVectorMask(Mask)) + return TTI->getShuffleCost(TargetTransformInfo::SK_Reverse, VecTypOp0, 0, + 0); + return -1; + } + case Instruction::Call: + if (const IntrinsicInst *II = dyn_cast(I)) { + SmallVector Tys; + for (unsigned J = 0, JE = II->getNumArgOperands(); J != JE; ++J) + Tys.push_back(II->getArgOperand(J)->getType()); + + return TTI->getIntrinsicInstrCost(II->getIntrinsicID(), II->getType(), + Tys); + } + return -1; default: // We don't have any information on this instruction. return -1; diff --git a/lib/Analysis/DbgInfoPrinter.cpp b/lib/Analysis/DbgInfoPrinter.cpp deleted file mode 100644 index 41cd34c07be0..000000000000 --- a/lib/Analysis/DbgInfoPrinter.cpp +++ /dev/null @@ -1,224 +0,0 @@ -//===- DbgInfoPrinter.cpp - Print debug info in a human readable form ------==// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a pass that prints instructions, and associated debug -// info: -// -// - source/line/col information -// - original variable name -// - original type name -// -//===----------------------------------------------------------------------===// - -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" - -using namespace llvm; - -static cl::opt -PrintDirectory("print-fullpath", - cl::desc("Print fullpath when printing debug info"), - cl::Hidden); - -namespace { - class PrintDbgInfo : public FunctionPass { - raw_ostream &Out; - void printVariableDeclaration(const Value *V); - public: - static char ID; // Pass identification - PrintDbgInfo() : FunctionPass(ID), Out(errs()) { - initializePrintDbgInfoPass(*PassRegistry::getPassRegistry()); - } - - virtual bool runOnFunction(Function &F); - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - } - }; - char PrintDbgInfo::ID = 0; -} - -INITIALIZE_PASS(PrintDbgInfo, "print-dbginfo", - "Print debug info in human readable form", false, false) - -FunctionPass *llvm::createDbgInfoPrinterPass() { return new PrintDbgInfo(); } - -/// Find the debug info descriptor corresponding to this global variable. -static Value *findDbgGlobalDeclare(GlobalVariable *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast(NMD->getOperand(i))); - if (!DIG.isGlobalVariable()) - continue; - if (DIGlobalVariable(DIG).getGlobal() == V) - return DIG; - } - return 0; -} - -/// Find the debug info descriptor corresponding to this function. -static Value *findDbgSubprogramDeclare(Function *V) { - const Module *M = V->getParent(); - NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp"); - if (!NMD) - return 0; - - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIDescriptor DIG(cast(NMD->getOperand(i))); - if (!DIG.isSubprogram()) - continue; - if (DISubprogram(DIG).getFunction() == V) - return DIG; - } - return 0; -} - -/// Finds the llvm.dbg.declare intrinsic corresponding to this value if any. -/// It looks through pointer casts too. -static const DbgDeclareInst *findDbgDeclare(const Value *V) { - V = V->stripPointerCasts(); - - if (!isa(V) && !isa(V)) - return 0; - - const Function *F = NULL; - if (const Instruction *I = dyn_cast(V)) - F = I->getParent()->getParent(); - else if (const Argument *A = dyn_cast(V)) - F = A->getParent(); - - for (Function::const_iterator FI = F->begin(), FE = F->end(); FI != FE; ++FI) - for (BasicBlock::const_iterator BI = (*FI).begin(), BE = (*FI).end(); - BI != BE; ++BI) - if (const DbgDeclareInst *DDI = dyn_cast(BI)) - if (DDI->getAddress() == V) - return DDI; - - return 0; -} - -static bool getLocationInfo(const Value *V, std::string &DisplayName, - std::string &Type, unsigned &LineNo, - std::string &File, std::string &Dir) { - DICompileUnit Unit; - DIType TypeD; - - if (GlobalVariable *GV = dyn_cast(const_cast(V))) { - Value *DIGV = findDbgGlobalDeclare(GV); - if (!DIGV) return false; - DIGlobalVariable Var(cast(DIGV)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else if (Function *F = dyn_cast(const_cast(V))){ - Value *DIF = findDbgSubprogramDeclare(F); - if (!DIF) return false; - DISubprogram Var(cast(DIF)); - - StringRef D = Var.getDisplayName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } else { - const DbgDeclareInst *DDI = findDbgDeclare(V); - if (!DDI) return false; - DIVariable Var(cast(DDI->getVariable())); - - StringRef D = Var.getName(); - if (!D.empty()) - DisplayName = D; - LineNo = Var.getLineNumber(); - Unit = Var.getCompileUnit(); - TypeD = Var.getType(); - } - - StringRef T = TypeD.getName(); - if (!T.empty()) - Type = T; - StringRef F = Unit.getFilename(); - if (!F.empty()) - File = F; - StringRef D = Unit.getDirectory(); - if (!D.empty()) - Dir = D; - return true; -} - -void PrintDbgInfo::printVariableDeclaration(const Value *V) { - std::string DisplayName, File, Directory, Type; - unsigned LineNo = 0; - - if (!getLocationInfo(V, DisplayName, Type, LineNo, File, Directory)) - return; - - Out << "; "; - WriteAsOperand(Out, V, false, 0); - if (isa(V)) - Out << " is function " << DisplayName - << " of type " << Type << " declared at "; - else - Out << " is variable " << DisplayName - << " of type " << Type << " declared at "; - - if (PrintDirectory) - Out << Directory << "/"; - - Out << File << ":" << LineNo << "\n"; -} - -bool PrintDbgInfo::runOnFunction(Function &F) { - if (F.isDeclaration()) - return false; - - Out << "function " << F.getName() << "\n\n"; - - for (Function::iterator I = F.begin(), E = F.end(); I != E; ++I) { - BasicBlock *BB = I; - - if (I != F.begin() && (pred_begin(BB) == pred_end(BB))) - // Skip dead blocks. - continue; - - Out << BB->getName(); - Out << ":"; - - Out << "\n"; - - for (BasicBlock::const_iterator i = BB->begin(), e = BB->end(); - i != e; ++i) { - - printVariableDeclaration(i); - - if (const User *U = dyn_cast(i)) { - for(unsigned i=0;igetNumOperands();i++) - printVariableDeclaration(U->getOperand(i)); - } - } - } - return false; -} diff --git a/lib/Analysis/DependenceAnalysis.cpp b/lib/Analysis/DependenceAnalysis.cpp index 95ac5ea233b1..cbc71bd6e739 100644 --- a/lib/Analysis/DependenceAnalysis.cpp +++ b/lib/Analysis/DependenceAnalysis.cpp @@ -55,12 +55,12 @@ #include "llvm/Analysis/DependenceAnalysis.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Operator.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/InstIterator.h" @@ -145,22 +145,20 @@ void DependenceAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { // Used to test the dependence analyzer. -// Looks through the function, noting the first store instruction -// and the first load instruction -// (which always follows the first load in our tests). -// Calls depends() and prints out the result. +// Looks through the function, noting loads and stores. +// Calls depends() on every possible pair and prints out the result. // Ignores all other instructions. static void dumpExampleDependence(raw_ostream &OS, Function *F, DependenceAnalysis *DA) { for (inst_iterator SrcI = inst_begin(F), SrcE = inst_end(F); SrcI != SrcE; ++SrcI) { - if (const StoreInst *Src = dyn_cast(&*SrcI)) { + if (isa(*SrcI) || isa(*SrcI)) { for (inst_iterator DstI = SrcI, DstE = inst_end(F); DstI != DstE; ++DstI) { - if (const LoadInst *Dst = dyn_cast(&*DstI)) { + if (isa(*DstI) || isa(*DstI)) { OS << "da analyze - "; - if (Dependence *D = DA->depends(Src, Dst, true)) { + if (Dependence *D = DA->depends(&*SrcI, &*DstI, true)) { D->dump(OS); for (unsigned Level = 1; Level <= D->getLevels(); Level++) { if (D->isSplitable(Level)) { @@ -173,7 +171,6 @@ void dumpExampleDependence(raw_ostream &OS, Function *F, } else OS << "none!\n"; - return; } } } @@ -224,8 +221,8 @@ bool Dependence::isScalar(unsigned level) const { //===----------------------------------------------------------------------===// // FullDependence methods -FullDependence::FullDependence(const Instruction *Source, - const Instruction *Destination, +FullDependence::FullDependence(Instruction *Source, + Instruction *Destination, bool PossiblyLoopIndependent, unsigned CommonLevels) : Dependence(Source, Destination), @@ -586,42 +583,40 @@ void Dependence::dump(raw_ostream &OS) const { else if (isInput()) OS << "input"; unsigned Levels = getLevels(); - if (Levels) { - OS << " ["; - for (unsigned II = 1; II <= Levels; ++II) { - if (isSplitable(II)) - Splitable = true; - if (isPeelFirst(II)) - OS << 'p'; - const SCEV *Distance = getDistance(II); - if (Distance) - OS << *Distance; - else if (isScalar(II)) - OS << "S"; + OS << " ["; + for (unsigned II = 1; II <= Levels; ++II) { + if (isSplitable(II)) + Splitable = true; + if (isPeelFirst(II)) + OS << 'p'; + const SCEV *Distance = getDistance(II); + if (Distance) + OS << *Distance; + else if (isScalar(II)) + OS << "S"; + else { + unsigned Direction = getDirection(II); + if (Direction == DVEntry::ALL) + OS << "*"; else { - unsigned Direction = getDirection(II); - if (Direction == DVEntry::ALL) - OS << "*"; - else { - if (Direction & DVEntry::LT) - OS << "<"; - if (Direction & DVEntry::EQ) - OS << "="; - if (Direction & DVEntry::GT) - OS << ">"; - } + if (Direction & DVEntry::LT) + OS << "<"; + if (Direction & DVEntry::EQ) + OS << "="; + if (Direction & DVEntry::GT) + OS << ">"; } - if (isPeelLast(II)) - OS << 'p'; - if (II < Levels) - OS << " "; } - if (isLoopIndependent()) - OS << "|<"; - OS << "]"; - if (Splitable) - OS << " splitable"; + if (isPeelLast(II)) + OS << 'p'; + if (II < Levels) + OS << " "; } + if (isLoopIndependent()) + OS << "|<"; + OS << "]"; + if (Splitable) + OS << " splitable"; } OS << "!\n"; } @@ -652,10 +647,10 @@ bool isLoadOrStore(const Instruction *I) { static -const Value *getPointerOperand(const Instruction *I) { - if (const LoadInst *LI = dyn_cast(I)) +Value *getPointerOperand(Instruction *I) { + if (LoadInst *LI = dyn_cast(I)) return LI->getPointerOperand(); - if (const StoreInst *SI = dyn_cast(I)) + if (StoreInst *SI = dyn_cast(I)) return SI->getPointerOperand(); llvm_unreachable("Value is not load or store instruction"); return 0; @@ -2215,13 +2210,13 @@ const SCEVConstant *getConstantPart(const SCEVMulExpr *Product) { // // It occurs to me that the presence of loop-invariant variables // changes the nature of the test from "greatest common divisor" -// to "a common divisor!" +// to "a common divisor". bool DependenceAnalysis::gcdMIVtest(const SCEV *Src, const SCEV *Dst, FullDependence &Result) const { DEBUG(dbgs() << "starting gcd\n"); ++GCDapplications; - unsigned BitWidth = Src->getType()->getIntegerBitWidth(); + unsigned BitWidth = SE->getTypeSizeInBits(Src->getType()); APInt RunningGCD = APInt::getNullValue(BitWidth); // Examine Src coefficients. @@ -3197,42 +3192,42 @@ static void dumpSmallBitVector(SmallBitVector &BV) { // Goff, Kennedy, Tseng // PLDI 1991 // -// Care is required to keep the code below up to date w.r.t. this routine. -Dependence *DependenceAnalysis::depends(const Instruction *Src, - const Instruction *Dst, +// Care is required to keep the routine below, getSplitIteration(), +// up to date with respect to this routine. +Dependence *DependenceAnalysis::depends(Instruction *Src, + Instruction *Dst, bool PossiblyLoopIndependent) { + if (Src == Dst) + PossiblyLoopIndependent = false; + if ((!Src->mayReadFromMemory() && !Src->mayWriteToMemory()) || (!Dst->mayReadFromMemory() && !Dst->mayWriteToMemory())) // if both instructions don't reference memory, there's no dependence return NULL; - if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) + if (!isLoadOrStore(Src) || !isLoadOrStore(Dst)) { // can only analyze simple loads and stores, i.e., no calls, invokes, etc. + DEBUG(dbgs() << "can only handle simple loads and stores\n"); return new Dependence(Src, Dst); + } - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); switch (underlyingObjectsAlias(AA, DstPtr, SrcPtr)) { case AliasAnalysis::MayAlias: case AliasAnalysis::PartialAlias: // cannot analyse objects if we don't understand their aliasing. + DEBUG(dbgs() << "can't analyze may or partial alias\n"); return new Dependence(Src, Dst); case AliasAnalysis::NoAlias: // If the objects noalias, they are distinct, accesses are independent. + DEBUG(dbgs() << "no alias\n"); return NULL; case AliasAnalysis::MustAlias: break; // The underlying objects alias; test accesses for dependence. } - const GEPOperator *SrcGEP = dyn_cast(SrcPtr); - const GEPOperator *DstGEP = dyn_cast(DstPtr); - if (!SrcGEP || !DstGEP) - return new Dependence(Src, Dst); // missing GEP, assume dependence - - if (SrcGEP->getPointerOperandType() != DstGEP->getPointerOperandType()) - return new Dependence(Src, Dst); // different types, assume dependence - // establish loop nesting levels establishNestingLevels(Src, Dst); DEBUG(dbgs() << " common nesting levels = " << CommonLevels << "\n"); @@ -3241,36 +3236,62 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, FullDependence Result(Src, Dst, PossiblyLoopIndependent, CommonLevels); ++TotalArrayPairs; - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast(SrcPtr); + GEPOperator *DstGEP = dyn_cast(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + DEBUG(dbgs() << " SrcPtrSCEV = " << *SrcPtrSCEV << "\n"); + DEBUG(dbgs() << " DstPtrSCEV = " << *DstPtrSCEV << "\n"); + + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - removeMatchingExtensions(&Pair[Pairs]); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); - DEBUG(dbgs() << " subscript " << Pairs << "\n"); - DEBUG(dbgs() << "\tsrc = " << *Pair[Pairs].Src << "\n"); - DEBUG(dbgs() << "\tdst = " << *Pair[Pairs].Dst << "\n"); - DEBUG(dbgs() << "\tclass = " << Pair[Pairs].Classification << "\n"); + if (UsefulGEP) { + DEBUG(dbgs() << " using GEPs\n"); + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + DEBUG(dbgs() << " ignoring GEPs\n"); + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + DEBUG(dbgs() << " SrcSCEV = " << *SrcSCEV << "\n"); + DEBUG(dbgs() << " DstSCEV = " << *DstSCEV << "\n"); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); + DEBUG(dbgs() << " subscript " << P << "\n"); + DEBUG(dbgs() << "\tsrc = " << *Pair[P].Src << "\n"); + DEBUG(dbgs() << "\tdst = " << *Pair[P].Dst << "\n"); + DEBUG(dbgs() << "\tclass = " << Pair[P].Classification << "\n"); DEBUG(dbgs() << "\tloops = "); - DEBUG(dumpSmallBitVector(Pair[Pairs].Loops)); + DEBUG(dumpSmallBitVector(Pair[P].Loops)); } SmallBitVector Separable(Pairs); @@ -3532,7 +3553,7 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } - // make sure Scalar flags are set correctly + // Make sure the Scalar flags are set correctly. SmallBitVector CompleteLoops(MaxLevels + 1); for (unsigned SI = 0; SI < Pairs; ++SI) CompleteLoops |= Pair[SI].Loops; @@ -3540,8 +3561,10 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, if (CompleteLoops[II]) Result.DV[II - 1].Scalar = false; - // make sure loopIndepent flag is set correctly if (PossiblyLoopIndependent) { + // Make sure the LoopIndependent flag is set correctly. + // All directions must include equal, otherwise no + // loop-independent dependence is possible. for (unsigned II = 1; II <= CommonLevels; ++II) { if (!(Result.getDirection(II) & Dependence::DVEntry::EQ)) { Result.LoopIndependent = false; @@ -3549,6 +3572,19 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, } } } + else { + // On the other hand, if all directions are equal and there's no + // loop-independent dependence possible, then no dependence exists. + bool AllEqual = true; + for (unsigned II = 1; II <= CommonLevels; ++II) { + if (Result.getDirection(II) != Dependence::DVEntry::EQ) { + AllEqual = false; + break; + } + } + if (AllEqual) + return NULL; + } FullDependence *Final = new FullDependence(Result); Result.DV = NULL; @@ -3565,7 +3601,8 @@ Dependence *DependenceAnalysis::depends(const Instruction *Src, // though simplified since we know that the dependence exists. // It's tedious, since we must go through all propagations, etc. // -// Care is required to keep this code up to date w.r.t. the code above. +// Care is required to keep this code up to date with respect to the routine +// above, depends(). // // Generally, the dependence analyzer will be used to build // a dependence graph for a function (basically a map from instructions @@ -3608,50 +3645,65 @@ const SCEV *DependenceAnalysis::getSplitIteration(const Dependence *Dep, assert(Dep && "expected a pointer to a Dependence"); assert(Dep->isSplitable(SplitLevel) && "Dep should be splitable at SplitLevel"); - const Instruction *Src = Dep->getSrc(); - const Instruction *Dst = Dep->getDst(); + Instruction *Src = Dep->getSrc(); + Instruction *Dst = Dep->getDst(); assert(Src->mayReadFromMemory() || Src->mayWriteToMemory()); assert(Dst->mayReadFromMemory() || Dst->mayWriteToMemory()); assert(isLoadOrStore(Src)); assert(isLoadOrStore(Dst)); - const Value *SrcPtr = getPointerOperand(Src); - const Value *DstPtr = getPointerOperand(Dst); + Value *SrcPtr = getPointerOperand(Src); + Value *DstPtr = getPointerOperand(Dst); assert(underlyingObjectsAlias(AA, DstPtr, SrcPtr) == AliasAnalysis::MustAlias); - const GEPOperator *SrcGEP = dyn_cast(SrcPtr); - const GEPOperator *DstGEP = dyn_cast(DstPtr); - assert(SrcGEP); - assert(DstGEP); - assert(SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()); // establish loop nesting levels establishNestingLevels(Src, Dst); FullDependence Result(Src, Dst, false, CommonLevels); - // classify subscript pairs - unsigned Pairs = SrcGEP->idx_end() - SrcGEP->idx_begin(); + // See if there are GEPs we can use. + bool UsefulGEP = false; + GEPOperator *SrcGEP = dyn_cast(SrcPtr); + GEPOperator *DstGEP = dyn_cast(DstPtr); + if (SrcGEP && DstGEP && + SrcGEP->getPointerOperandType() == DstGEP->getPointerOperandType()) { + const SCEV *SrcPtrSCEV = SE->getSCEV(SrcGEP->getPointerOperand()); + const SCEV *DstPtrSCEV = SE->getSCEV(DstGEP->getPointerOperand()); + UsefulGEP = + isLoopInvariant(SrcPtrSCEV, LI->getLoopFor(Src->getParent())) && + isLoopInvariant(DstPtrSCEV, LI->getLoopFor(Dst->getParent())); + } + unsigned Pairs = UsefulGEP ? SrcGEP->idx_end() - SrcGEP->idx_begin() : 1; SmallVector Pair(Pairs); - for (unsigned SI = 0; SI < Pairs; ++SI) { - Pair[SI].Loops.resize(MaxLevels + 1); - Pair[SI].GroupLoops.resize(MaxLevels + 1); - Pair[SI].Group.resize(Pairs); - } - Pairs = 0; - for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), - SrcEnd = SrcGEP->idx_end(), - DstIdx = DstGEP->idx_begin(), - DstEnd = DstGEP->idx_end(); - SrcIdx != SrcEnd && DstIdx != DstEnd; - ++SrcIdx, ++DstIdx, ++Pairs) { - Pair[Pairs].Src = SE->getSCEV(*SrcIdx); - Pair[Pairs].Dst = SE->getSCEV(*DstIdx); - Pair[Pairs].Classification = - classifyPair(Pair[Pairs].Src, LI->getLoopFor(Src->getParent()), - Pair[Pairs].Dst, LI->getLoopFor(Dst->getParent()), - Pair[Pairs].Loops); - Pair[Pairs].GroupLoops = Pair[Pairs].Loops; - Pair[Pairs].Group.set(Pairs); + if (UsefulGEP) { + unsigned P = 0; + for (GEPOperator::const_op_iterator SrcIdx = SrcGEP->idx_begin(), + SrcEnd = SrcGEP->idx_end(), + DstIdx = DstGEP->idx_begin(); + SrcIdx != SrcEnd; + ++SrcIdx, ++DstIdx, ++P) { + Pair[P].Src = SE->getSCEV(*SrcIdx); + Pair[P].Dst = SE->getSCEV(*DstIdx); + } + } + else { + const SCEV *SrcSCEV = SE->getSCEV(SrcPtr); + const SCEV *DstSCEV = SE->getSCEV(DstPtr); + Pair[0].Src = SrcSCEV; + Pair[0].Dst = DstSCEV; + } + + for (unsigned P = 0; P < Pairs; ++P) { + Pair[P].Loops.resize(MaxLevels + 1); + Pair[P].GroupLoops.resize(MaxLevels + 1); + Pair[P].Group.resize(Pairs); + removeMatchingExtensions(&Pair[P]); + Pair[P].Classification = + classifyPair(Pair[P].Src, LI->getLoopFor(Src->getParent()), + Pair[P].Dst, LI->getLoopFor(Dst->getParent()), + Pair[P].Loops); + Pair[P].GroupLoops = Pair[P].Loops; + Pair[P].Group.set(P); } SmallBitVector Separable(Pairs); diff --git a/lib/Analysis/DominanceFrontier.cpp b/lib/Analysis/DominanceFrontier.cpp index 3e537e9f1a36..7e4a89f1bd57 100644 --- a/lib/Analysis/DominanceFrontier.cpp +++ b/lib/Analysis/DominanceFrontier.cpp @@ -8,9 +8,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/DominanceFrontier.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/IPA/CMakeLists.txt b/lib/Analysis/IPA/CMakeLists.txt index 34d6d1bdd421..67b413577980 100644 --- a/lib/Analysis/IPA/CMakeLists.txt +++ b/lib/Analysis/IPA/CMakeLists.txt @@ -1,9 +1,11 @@ add_llvm_library(LLVMipa CallGraph.cpp CallGraphSCCPass.cpp + CallPrinter.cpp FindUsedTypes.cpp GlobalsModRef.cpp IPA.cpp + InlineCost.cpp ) add_dependencies(LLVMipa intrinsics_gen) diff --git a/lib/Analysis/IPA/CallGraph.cpp b/lib/Analysis/IPA/CallGraph.cpp index dec0eced2786..7620fd9842cc 100644 --- a/lib/Analysis/IPA/CallGraph.cpp +++ b/lib/Analysis/IPA/CallGraph.cpp @@ -13,9 +13,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/CallGraph.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/IPA/CallGraphSCCPass.cpp b/lib/Analysis/IPA/CallGraphSCCPass.cpp index 449b7ee87b1c..a0d788f34a3c 100644 --- a/lib/Analysis/IPA/CallGraphSCCPass.cpp +++ b/lib/Analysis/IPA/CallGraphSCCPass.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "cgscc-passmgr" -#include "llvm/CallGraphSCCPass.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/PassManagers.h" -#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallGraphSCCPass.h" #include "llvm/ADT/SCCIterator.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/CallGraph.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/PassManagers.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Timer.h" @@ -51,6 +51,9 @@ public: /// whether any of the passes modifies the module, and if so, return true. bool runOnModule(Module &M); + using ModulePass::doInitialization; + using ModulePass::doFinalization; + bool doInitialization(CallGraph &CG); bool doFinalization(CallGraph &CG); diff --git a/lib/Analysis/IPA/CallPrinter.cpp b/lib/Analysis/IPA/CallPrinter.cpp new file mode 100644 index 000000000000..306ae7a4dbfb --- /dev/null +++ b/lib/Analysis/IPA/CallPrinter.cpp @@ -0,0 +1,87 @@ +//===- CallPrinter.cpp - DOT printer for call graph -----------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines '-dot-callgraph', which emit a callgraph..dot +// containing the call graph of a module. +// +// There is also a pass available to directly call dotty ('-view-callgraph'). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/CallGraph.h" +#include "llvm/Analysis/CallPrinter.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" + +using namespace llvm; + +namespace llvm { + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(CallGraph *Graph) { + return "Call graph"; + } + + std::string getNodeLabel(CallGraphNode *Node, CallGraph *Graph) { + if (Function *Func = Node->getFunction()) + return Func->getName(); + + return "external node"; + } +}; + +} // end llvm namespace + +namespace { + +struct CallGraphViewer + : public DOTGraphTraitsModuleViewer { + static char ID; + + CallGraphViewer() + : DOTGraphTraitsModuleViewer("callgraph", ID) { + initializeCallGraphViewerPass(*PassRegistry::getPassRegistry()); + } +}; + +struct CallGraphPrinter + : public DOTGraphTraitsModulePrinter { + static char ID; + + CallGraphPrinter() + : DOTGraphTraitsModulePrinter("callgraph", ID) { + initializeCallGraphPrinterPass(*PassRegistry::getPassRegistry()); + } +}; + +} // end anonymous namespace + +char CallGraphViewer::ID = 0; +INITIALIZE_PASS(CallGraphViewer, "view-callgraph", + "View call graph", + false, false) + +char CallGraphPrinter::ID = 0; +INITIALIZE_PASS(CallGraphPrinter, "dot-callgraph", + "Print call graph to 'dot' file", + false, false) + +// Create methods available outside of this file, to use them +// "include/llvm/LinkAllPasses.h". Otherwise the pass would be deleted by +// the link time optimization. + +ModulePass *llvm::createCallGraphViewerPass() { + return new CallGraphViewer(); +} + +ModulePass *llvm::createCallGraphPrinterPass() { + return new CallGraphPrinter(); +} diff --git a/lib/Analysis/IPA/FindUsedTypes.cpp b/lib/Analysis/IPA/FindUsedTypes.cpp index e9df3ca01022..1c4f17d3819a 100644 --- a/lib/Analysis/IPA/FindUsedTypes.cpp +++ b/lib/Analysis/IPA/FindUsedTypes.cpp @@ -14,10 +14,10 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/FindUsedTypes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/IPA/GlobalsModRef.cpp b/lib/Analysis/IPA/GlobalsModRef.cpp index 990caa80c8d2..92d0d2318e0d 100644 --- a/lib/Analysis/IPA/GlobalsModRef.cpp +++ b/lib/Analysis/IPA/GlobalsModRef.cpp @@ -16,20 +16,20 @@ #define DEBUG_TYPE "globalsmodref-aa" #include "llvm/Analysis/Passes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" -#include "llvm/Instructions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/SCCIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/CallGraph.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/InstIterator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SCCIterator.h" #include using namespace llvm; diff --git a/lib/Analysis/IPA/IPA.cpp b/lib/Analysis/IPA/IPA.cpp index 0ba2e04c6302..aa5164e9e79b 100644 --- a/lib/Analysis/IPA/IPA.cpp +++ b/lib/Analysis/IPA/IPA.cpp @@ -20,6 +20,8 @@ using namespace llvm; void llvm::initializeIPA(PassRegistry &Registry) { initializeBasicCallGraphPass(Registry); initializeCallGraphAnalysisGroup(Registry); + initializeCallGraphPrinterPass(Registry); + initializeCallGraphViewerPass(Registry); initializeFindUsedTypesPass(Registry); initializeGlobalsModRefPass(Registry); } diff --git a/lib/Analysis/IPA/InlineCost.cpp b/lib/Analysis/IPA/InlineCost.cpp new file mode 100644 index 000000000000..35c45e61808b --- /dev/null +++ b/lib/Analysis/IPA/InlineCost.cpp @@ -0,0 +1,1239 @@ +//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements inline cost analysis. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "inline-cost" +#include "llvm/Analysis/InlineCost.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/InstVisitor.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); + +namespace { + +class CallAnalyzer : public InstVisitor { + typedef InstVisitor Base; + friend class InstVisitor; + + // DataLayout if available, or null. + const DataLayout *const TD; + + /// The TargetTransformInfo available for this compilation. + const TargetTransformInfo &TTI; + + // The called function. + Function &F; + + int Threshold; + int Cost; + + bool IsCallerRecursive; + bool IsRecursiveCall; + bool ExposesReturnsTwice; + bool HasDynamicAlloca; + bool ContainsNoDuplicateCall; + + /// Number of bytes allocated statically by the callee. + uint64_t AllocatedSize; + unsigned NumInstructions, NumVectorInstructions; + int FiftyPercentVectorBonus, TenPercentVectorBonus; + int VectorBonus; + + // While we walk the potentially-inlined instructions, we build up and + // maintain a mapping of simplified values specific to this callsite. The + // idea is to propagate any special information we have about arguments to + // this call through the inlinable section of the function, and account for + // likely simplifications post-inlining. The most important aspect we track + // is CFG altering simplifications -- when we prove a basic block dead, that + // can cause dramatic shifts in the cost of inlining a function. + DenseMap SimplifiedValues; + + // Keep track of the values which map back (through function arguments) to + // allocas on the caller stack which could be simplified through SROA. + DenseMap SROAArgValues; + + // The mapping of caller Alloca values to their accumulated cost savings. If + // we have to disable SROA for one of the allocas, this tells us how much + // cost must be added. + DenseMap SROAArgCosts; + + // Keep track of values which map to a pointer base and constant offset. + DenseMap > ConstantOffsetPtrs; + + // Custom simplification helper routines. + bool isAllocaDerivedArg(Value *V); + bool lookupSROAArgAndCost(Value *V, Value *&Arg, + DenseMap::iterator &CostIt); + void disableSROA(DenseMap::iterator CostIt); + void disableSROA(Value *V); + void accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost); + bool handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost); + bool isGEPOffsetConstant(GetElementPtrInst &GEP); + bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); + bool simplifyCallSite(Function *F, CallSite CS); + ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); + + // Custom analysis routines. + bool analyzeBlock(BasicBlock *BB); + + // Disable several entry points to the visitor so we don't accidentally use + // them by declaring but not defining them here. + void visit(Module *); void visit(Module &); + void visit(Function *); void visit(Function &); + void visit(BasicBlock *); void visit(BasicBlock &); + + // Provide base case for our instruction visit. + bool visitInstruction(Instruction &I); + + // Our visit overrides. + bool visitAlloca(AllocaInst &I); + bool visitPHI(PHINode &I); + bool visitGetElementPtr(GetElementPtrInst &I); + bool visitBitCast(BitCastInst &I); + bool visitPtrToInt(PtrToIntInst &I); + bool visitIntToPtr(IntToPtrInst &I); + bool visitCastInst(CastInst &I); + bool visitUnaryInstruction(UnaryInstruction &I); + bool visitICmp(ICmpInst &I); + bool visitSub(BinaryOperator &I); + bool visitBinaryOperator(BinaryOperator &I); + bool visitLoad(LoadInst &I); + bool visitStore(StoreInst &I); + bool visitExtractValue(ExtractValueInst &I); + bool visitInsertValue(InsertValueInst &I); + bool visitCallSite(CallSite CS); + +public: + CallAnalyzer(const DataLayout *TD, const TargetTransformInfo &TTI, + Function &Callee, int Threshold) + : TD(TD), TTI(TTI), F(Callee), Threshold(Threshold), Cost(0), + IsCallerRecursive(false), IsRecursiveCall(false), + ExposesReturnsTwice(false), HasDynamicAlloca(false), + ContainsNoDuplicateCall(false), AllocatedSize(0), NumInstructions(0), + NumVectorInstructions(0), FiftyPercentVectorBonus(0), + TenPercentVectorBonus(0), VectorBonus(0), NumConstantArgs(0), + NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), NumConstantPtrCmps(0), + NumConstantPtrDiffs(0), NumInstructionsSimplified(0), + SROACostSavings(0), SROACostSavingsLost(0) {} + + bool analyzeCall(CallSite CS); + + int getThreshold() { return Threshold; } + int getCost() { return Cost; } + + // Keep a bunch of stats about the cost savings found so we can print them + // out when debugging. + unsigned NumConstantArgs; + unsigned NumConstantOffsetPtrArgs; + unsigned NumAllocaArgs; + unsigned NumConstantPtrCmps; + unsigned NumConstantPtrDiffs; + unsigned NumInstructionsSimplified; + unsigned SROACostSavings; + unsigned SROACostSavingsLost; + + void dump(); +}; + +} // namespace + +/// \brief Test whether the given value is an Alloca-derived function argument. +bool CallAnalyzer::isAllocaDerivedArg(Value *V) { + return SROAArgValues.count(V); +} + +/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. +/// Returns false if V does not map to a SROA-candidate. +bool CallAnalyzer::lookupSROAArgAndCost( + Value *V, Value *&Arg, DenseMap::iterator &CostIt) { + if (SROAArgValues.empty() || SROAArgCosts.empty()) + return false; + + DenseMap::iterator ArgIt = SROAArgValues.find(V); + if (ArgIt == SROAArgValues.end()) + return false; + + Arg = ArgIt->second; + CostIt = SROAArgCosts.find(Arg); + return CostIt != SROAArgCosts.end(); +} + +/// \brief Disable SROA for the candidate marked by this cost iterator. +/// +/// This marks the candidate as no longer viable for SROA, and adds the cost +/// savings associated with it back into the inline cost measurement. +void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { + // If we're no longer able to perform SROA we need to undo its cost savings + // and prevent subsequent analysis. + Cost += CostIt->second; + SROACostSavings -= CostIt->second; + SROACostSavingsLost += CostIt->second; + SROAArgCosts.erase(CostIt); +} + +/// \brief If 'V' maps to a SROA candidate, disable SROA for it. +void CallAnalyzer::disableSROA(Value *V) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(V, SROAArg, CostIt)) + disableSROA(CostIt); +} + +/// \brief Accumulate the given cost for a particular SROA candidate. +void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, + int InstructionCost) { + CostIt->second += InstructionCost; + SROACostSavings += InstructionCost; +} + +/// \brief Helper for the common pattern of handling a SROA candidate. +/// Either accumulates the cost savings if the SROA remains valid, or disables +/// SROA for the candidate. +bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, + DenseMap::iterator CostIt, + int InstructionCost) { + if (IsSROAValid) { + accumulateSROACost(CostIt, InstructionCost); + return true; + } + + disableSROA(CostIt); + return false; +} + +/// \brief Check whether a GEP's indices are all constant. +/// +/// Respects any simplified values known during the analysis of this callsite. +bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { + for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) + if (!isa(*I) && !SimplifiedValues.lookup(*I)) + return false; + + return true; +} + +/// \brief Accumulate a constant GEP offset into an APInt if possible. +/// +/// Returns false if unable to compute the offset for any reason. Respects any +/// simplified values known during the analysis of this callsite. +bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { + if (!TD) + return false; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + assert(IntPtrWidth == Offset.getBitWidth()); + + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + ConstantInt *OpC = dyn_cast(GTI.getOperand()); + if (!OpC) + if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) + OpC = dyn_cast(SimpleOp); + if (!OpC) + return false; + if (OpC->isZero()) continue; + + // Handle a struct index, which adds its field offset to the pointer. + if (StructType *STy = dyn_cast(*GTI)) { + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = TD->getStructLayout(STy); + Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); + continue; + } + + APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); + Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; + } + return true; +} + +bool CallAnalyzer::visitAlloca(AllocaInst &I) { + // FIXME: Check whether inlining will turn a dynamic alloca into a static + // alloca, and handle that case. + + // Accumulate the allocated size. + if (I.isStaticAlloca()) { + Type *Ty = I.getAllocatedType(); + AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : + Ty->getPrimitiveSizeInBits()); + } + + // We will happily inline static alloca instructions. + if (I.isStaticAlloca()) + return Base::visitAlloca(I); + + // FIXME: This is overly conservative. Dynamic allocas are inefficient for + // a variety of reasons, and so we would like to not inline them into + // functions which don't currently have a dynamic alloca. This simply + // disables inlining altogether in the presence of a dynamic alloca. + HasDynamicAlloca = true; + return false; +} + +bool CallAnalyzer::visitPHI(PHINode &I) { + // FIXME: We should potentially be tracking values through phi nodes, + // especially when they collapse to a single value due to deleted CFG edges + // during inlining. + + // FIXME: We need to propagate SROA *disabling* through phi nodes, even + // though we don't want to propagate it's bonuses. The idea is to disable + // SROA if it *might* be used in an inappropriate manner. + + // Phi nodes are always zero-cost. + return true; +} + +bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), + SROAArg, CostIt); + + // Try to fold GEPs of constant-offset call site argument pointers. This + // requires target data and inbounds GEPs. + if (TD && I.isInBounds()) { + // Check if we have a base + offset for the pointer. + Value *Ptr = I.getPointerOperand(); + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); + if (BaseAndOffset.first) { + // Check if the offset of this GEP is constant, and if so accumulate it + // into Offset. + if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { + // Non-constant GEPs aren't folded, and disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; + } + + // Add the result as a new mapping to Base + Offset. + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also handle SROA candidates here, we already know that the GEP is + // all-constant indexed. + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + return true; + } + } + + if (isGEPOffsetConstant(I)) { + if (SROACandidate) + SROAArgValues[&I] = SROAArg; + + // Constant GEPs are modeled as free. + return true; + } + + // Variable GEPs will require math and will disable SROA. + if (SROACandidate) + disableSROA(CostIt); + return false; +} + +bool CallAnalyzer::visitBitCast(BitCastInst &I) { + // Propagate constants through bitcasts. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offsets through casts + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + // Casts don't change the offset, just wrap it up. + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + + // Also look for SROA candidates here. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + // Bitcasts are always zero cost. + return true; +} + +bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when converted to a plain integer provided the + // integer is large enough to represent the pointer. + unsigned IntegerSize = I.getType()->getScalarSizeInBits(); + if (TD && IntegerSize >= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset + = ConstantOffsetPtrs.lookup(I.getOperand(0)); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // This is really weird. Technically, ptrtoint will disable SROA. However, + // unless that ptrtoint is *used* somewhere in the live basic blocks after + // inlining, it will be nuked, and SROA should proceed. All of the uses which + // would block SROA would also block SROA if applied directly to a pointer, + // and so we can just add the integer in here. The only places where SROA is + // preserved either cannot fire on an integer, or won't in-and-of themselves + // disable SROA (ext) w/o some later use that we would see and disable. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Track base/offset pairs when round-tripped through a pointer without + // modifications provided the integer is not too large. + Value *Op = I.getOperand(0); + unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); + if (TD && IntegerSize <= TD->getPointerSizeInBits()) { + std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); + if (BaseAndOffset.first) + ConstantOffsetPtrs[&I] = BaseAndOffset; + } + + // "Propagate" SROA here in the same manner as we do for ptrtoint above. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) + SROAArgValues[&I] = SROAArg; + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitCastInst(CastInst &I) { + // Propagate constants through ptrtoint. + Constant *COp = dyn_cast(I.getOperand(0)); + if (!COp) + COp = SimplifiedValues.lookup(I.getOperand(0)); + if (COp) + if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. + disableSROA(I.getOperand(0)); + + return TargetTransformInfo::TCC_Free == TTI.getUserCost(&I); +} + +bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { + Value *Operand = I.getOperand(0); + Constant *COp = dyn_cast(Operand); + if (!COp) + COp = SimplifiedValues.lookup(Operand); + if (COp) + if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), + COp, TD)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on the argument to arbitrary unary operators. + disableSROA(Operand); + + return false; +} + +bool CallAnalyzer::visitICmp(ICmpInst &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + // First try to handle simplified comparisons. + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + if (Constant *CLHS = dyn_cast(LHS)) + if (Constant *CRHS = dyn_cast(RHS)) + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + return true; + } + + // Otherwise look for a comparison between constant offset pointers with + // a common base. + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the icmp to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrCmps; + return true; + } + } + } + + // If the comparison is an equality comparison with null, we can simplify it + // for any alloca-derived argument. + if (I.isEquality() && isa(I.getOperand(1))) + if (isAllocaDerivedArg(I.getOperand(0))) { + // We can actually predict the result of comparisons between an + // alloca-derived value and null. Note that this fires regardless of + // SROA firing. + bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; + SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) + : ConstantInt::getFalse(I.getType()); + return true; + } + + // Finally check for SROA candidates in comparisons. + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (isa(I.getOperand(1))) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitSub(BinaryOperator &I) { + // Try to handle a special case: we can fold computing the difference of two + // constant-related pointers. + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + Value *LHSBase, *RHSBase; + APInt LHSOffset, RHSOffset; + llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); + if (LHSBase) { + llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); + if (RHSBase && LHSBase == RHSBase) { + // We have common bases, fold the subtract to a constant based on the + // offsets. + Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); + Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); + if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { + SimplifiedValues[&I] = C; + ++NumConstantPtrDiffs; + return true; + } + } + } + + // Otherwise, fall back to the generic logic for simplifying and handling + // instructions. + return Base::visitSub(I); +} + +bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { + Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); + if (!isa(LHS)) + if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) + LHS = SimpleLHS; + if (!isa(RHS)) + if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) + RHS = SimpleRHS; + Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); + if (Constant *C = dyn_cast_or_null(SimpleV)) { + SimplifiedValues[&I] = C; + return true; + } + + // Disable any SROA on arguments to arbitrary, unsimplified binary operators. + disableSROA(LHS); + disableSROA(RHS); + + return false; +} + +bool CallAnalyzer::visitLoad(LoadInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitStore(StoreInst &I) { + Value *SROAArg; + DenseMap::iterator CostIt; + if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { + if (I.isSimple()) { + accumulateSROACost(CostIt, InlineConstants::InstrCost); + return true; + } + + disableSROA(CostIt); + } + + return false; +} + +bool CallAnalyzer::visitExtractValue(ExtractValueInst &I) { + // Constant folding for extract value is trivial. + Constant *C = dyn_cast(I.getAggregateOperand()); + if (!C) + C = SimplifiedValues.lookup(I.getAggregateOperand()); + if (C) { + SimplifiedValues[&I] = ConstantExpr::getExtractValue(C, I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +bool CallAnalyzer::visitInsertValue(InsertValueInst &I) { + // Constant folding for insert value is trivial. + Constant *AggC = dyn_cast(I.getAggregateOperand()); + if (!AggC) + AggC = SimplifiedValues.lookup(I.getAggregateOperand()); + Constant *InsertedC = dyn_cast(I.getInsertedValueOperand()); + if (!InsertedC) + InsertedC = SimplifiedValues.lookup(I.getInsertedValueOperand()); + if (AggC && InsertedC) { + SimplifiedValues[&I] = ConstantExpr::getInsertValue(AggC, InsertedC, + I.getIndices()); + return true; + } + + // SROA can look through these but give them a cost. + return false; +} + +/// \brief Try to simplify a call site. +/// +/// Takes a concrete function and callsite and tries to actually simplify it by +/// analyzing the arguments and call itself with instsimplify. Returns true if +/// it has simplified the callsite to some other entity (a constant), making it +/// free. +bool CallAnalyzer::simplifyCallSite(Function *F, CallSite CS) { + // FIXME: Using the instsimplify logic directly for this is inefficient + // because we have to continually rebuild the argument list even when no + // simplifications can be performed. Until that is fixed with remapping + // inside of instsimplify, directly constant fold calls here. + if (!canConstantFoldCallTo(F)) + return false; + + // Try to re-map the arguments to constants. + SmallVector ConstantArgs; + ConstantArgs.reserve(CS.arg_size()); + for (CallSite::arg_iterator I = CS.arg_begin(), E = CS.arg_end(); + I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + C = dyn_cast_or_null(SimplifiedValues.lookup(*I)); + if (!C) + return false; // This argument doesn't map to a constant. + + ConstantArgs.push_back(C); + } + if (Constant *C = ConstantFoldCall(F, ConstantArgs)) { + SimplifiedValues[CS.getInstruction()] = C; + return true; + } + + return false; +} + +bool CallAnalyzer::visitCallSite(CallSite CS) { + if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && + !F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice)) { + // This aborts the entire analysis. + ExposesReturnsTwice = true; + return false; + } + if (CS.isCall() && + cast(CS.getInstruction())->hasFnAttr(Attribute::NoDuplicate)) + ContainsNoDuplicateCall = true; + + if (Function *F = CS.getCalledFunction()) { + // When we have a concrete function, first try to simplify it directly. + if (simplifyCallSite(F, CS)) + return true; + + // Next check if it is an intrinsic we know about. + // FIXME: Lift this into part of the InstVisitor. + if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { + switch (II->getIntrinsicID()) { + default: + return Base::visitCallSite(CS); + + case Intrinsic::memset: + case Intrinsic::memcpy: + case Intrinsic::memmove: + // SROA can usually chew through these intrinsics, but they aren't free. + return false; + } + } + + if (F == CS.getInstruction()->getParent()->getParent()) { + // This flag will fully abort the analysis, so don't bother with anything + // else. + IsRecursiveCall = true; + return false; + } + + if (TTI.isLoweredToCall(F)) { + // We account for the average 1 instruction per call argument setup + // here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Everything other than inline ASM will also have a significant cost + // merely from making the call. + if (!isa(CS.getCalledValue())) + Cost += InlineConstants::CallPenalty; + } + + return Base::visitCallSite(CS); + } + + // Otherwise we're in a very special case -- an indirect function call. See + // if we can be particularly clever about this. + Value *Callee = CS.getCalledValue(); + + // First, pay the price of the argument setup. We account for the average + // 1 instruction per call argument setup here. + Cost += CS.arg_size() * InlineConstants::InstrCost; + + // Next, check if this happens to be an indirect function call to a known + // function in this inline context. If not, we've done all we can. + Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); + if (!F) + return Base::visitCallSite(CS); + + // If we have a constant that we are calling as a function, we can peer + // through it and see the function target. This happens not infrequently + // during devirtualization and so we want to give it a hefty bonus for + // inlining, but cap that bonus in the event that inlining wouldn't pan + // out. Pretend to inline the function, with a custom threshold. + CallAnalyzer CA(TD, TTI, *F, InlineConstants::IndirectCallThreshold); + if (CA.analyzeCall(CS)) { + // We were able to inline the indirect call! Subtract the cost from the + // bonus we want to apply, but don't go below zero. + Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + } + + return Base::visitCallSite(CS); +} + +bool CallAnalyzer::visitInstruction(Instruction &I) { + // Some instructions are free. All of the free intrinsics can also be + // handled by SROA, etc. + if (TargetTransformInfo::TCC_Free == TTI.getUserCost(&I)) + return true; + + // We found something we don't understand or can't handle. Mark any SROA-able + // values in the operand list as no longer viable. + for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) + disableSROA(*OI); + + return false; +} + + +/// \brief Analyze a basic block for its contribution to the inline cost. +/// +/// This method walks the analyzer over every instruction in the given basic +/// block and accounts for their cost during inlining at this callsite. It +/// aborts early if the threshold has been exceeded or an impossible to inline +/// construct has been detected. It returns false if inlining is no longer +/// viable, and true if inlining remains viable. +bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { + for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); + I != E; ++I) { + ++NumInstructions; + if (isa(I) || I->getType()->isVectorTy()) + ++NumVectorInstructions; + + // If the instruction simplified to a constant, there is no cost to this + // instruction. Visit the instructions using our InstVisitor to account for + // all of the per-instruction logic. The visit tree returns true if we + // consumed the instruction in any way, and false if the instruction's base + // cost should count against inlining. + if (Base::visit(I)) + ++NumInstructionsSimplified; + else + Cost += InlineConstants::InstrCost; + + // If the visit this instruction detected an uninlinable pattern, abort. + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + if (NumVectorInstructions > NumInstructions/2) + VectorBonus = FiftyPercentVectorBonus; + else if (NumVectorInstructions > NumInstructions/10) + VectorBonus = TenPercentVectorBonus; + else + VectorBonus = 0; + + // Check if we've past the threshold so we don't spin in huge basic + // blocks that will never inline. + if (Cost > (Threshold + VectorBonus)) + return false; + } + + return true; +} + +/// \brief Compute the base pointer and cumulative constant offsets for V. +/// +/// This strips all constant offsets off of V, leaving it the base pointer, and +/// accumulates the total constant offset applied in the returned constant. It +/// returns 0 if V is not a pointer, and returns the constant '0' if there are +/// no constant offsets applied. +ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { + if (!TD || !V->getType()->isPointerTy()) + return 0; + + unsigned IntPtrWidth = TD->getPointerSizeInBits(); + APInt Offset = APInt::getNullValue(IntPtrWidth); + + // Even though we don't look through PHI nodes, we could be called on an + // instruction in an unreachable block, which may be on a cycle. + SmallPtrSet Visited; + Visited.insert(V); + do { + if (GEPOperator *GEP = dyn_cast(V)) { + if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) + return 0; + V = GEP->getPointerOperand(); + } else if (Operator::getOpcode(V) == Instruction::BitCast) { + V = cast(V)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(V)) { + if (GA->mayBeOverridden()) + break; + V = GA->getAliasee(); + } else { + break; + } + assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + } while (Visited.insert(V)); + + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + return cast(ConstantInt::get(IntPtrTy, Offset)); +} + +/// \brief Analyze a call site for potential inlining. +/// +/// Returns true if inlining this call is viable, and false if it is not +/// viable. It computes the cost and adjusts the threshold based on numerous +/// factors and heuristics. If this method returns false but the computed cost +/// is below the computed threshold, then inlining was forcibly disabled by +/// some artifact of the routine. +bool CallAnalyzer::analyzeCall(CallSite CS) { + ++NumCallsAnalyzed; + + // Track whether the post-inlining function would have more than one basic + // block. A single basic block is often intended for inlining. Balloon the + // threshold by 50% until we pass the single-BB phase. + bool SingleBB = true; + int SingleBBBonus = Threshold / 2; + Threshold += SingleBBBonus; + + // Perform some tweaks to the cost and threshold based on the direct + // callsite information. + + // We want to more aggressively inline vector-dense kernels, so up the + // threshold, and we'll lower it if the % of vector instructions gets too + // low. + assert(NumInstructions == 0); + assert(NumVectorInstructions == 0); + FiftyPercentVectorBonus = Threshold; + TenPercentVectorBonus = Threshold / 2; + + // Give out bonuses per argument, as the instructions setting them up will + // be gone after inlining. + for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { + if (TD && CS.isByValArgument(I)) { + // We approximate the number of loads and stores needed by dividing the + // size of the byval type by the target's pointer size. + PointerType *PTy = cast(CS.getArgument(I)->getType()); + unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); + unsigned PointerSize = TD->getPointerSizeInBits(); + // Ceiling division. + unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; + + // If it generates more than 8 stores it is likely to be expanded as an + // inline memcpy so we take that as an upper bound. Otherwise we assume + // one load and one store per word copied. + // FIXME: The maxStoresPerMemcpy setting from the target should be used + // here instead of a magic number of 8, but it's not available via + // DataLayout. + NumStores = std::min(NumStores, 8U); + + Cost -= 2 * NumStores * InlineConstants::InstrCost; + } else { + // For non-byval arguments subtract off one instruction per call + // argument. + Cost -= InlineConstants::InstrCost; + } + } + + // If there is only one call of the function, and it has internal linkage, + // the cost of inlining it drops dramatically. + bool OnlyOneCallAndLocalLinkage = F.hasLocalLinkage() && F.hasOneUse() && + &F == CS.getCalledFunction(); + if (OnlyOneCallAndLocalLinkage) + Cost += InlineConstants::LastCallToStaticBonus; + + // If the instruction after the call, or if the normal destination of the + // invoke is an unreachable instruction, the function is noreturn. As such, + // there is little point in inlining this unless there is literally zero + // cost. + Instruction *Instr = CS.getInstruction(); + if (InvokeInst *II = dyn_cast(Instr)) { + if (isa(II->getNormalDest()->begin())) + Threshold = 1; + } else if (isa(++BasicBlock::iterator(Instr))) + Threshold = 1; + + // If this function uses the coldcc calling convention, prefer not to inline + // it. + if (F.getCallingConv() == CallingConv::Cold) + Cost += InlineConstants::ColdccPenalty; + + // Check if we're done. This can happen due to bonuses and penalties. + if (Cost > Threshold) + return false; + + if (F.empty()) + return true; + + Function *Caller = CS.getInstruction()->getParent()->getParent(); + // Check if the caller function is recursive itself. + for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); + U != E; ++U) { + CallSite Site(cast(*U)); + if (!Site) + continue; + Instruction *I = Site.getInstruction(); + if (I->getParent()->getParent() == Caller) { + IsCallerRecursive = true; + break; + } + } + + // Track whether we've seen a return instruction. The first return + // instruction is free, as at least one will usually disappear in inlining. + bool HasReturn = false; + + // Populate our simplified values by mapping from function arguments to call + // arguments with known important simplifications. + CallSite::arg_iterator CAI = CS.arg_begin(); + for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); + FAI != FAE; ++FAI, ++CAI) { + assert(CAI != CS.arg_end()); + if (Constant *C = dyn_cast(CAI)) + SimplifiedValues[FAI] = C; + + Value *PtrArg = *CAI; + if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { + ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); + + // We can SROA any pointer arguments derived from alloca instructions. + if (isa(PtrArg)) { + SROAArgValues[FAI] = PtrArg; + SROAArgCosts[PtrArg] = 0; + } + } + } + NumConstantArgs = SimplifiedValues.size(); + NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); + NumAllocaArgs = SROAArgValues.size(); + + // The worklist of live basic blocks in the callee *after* inlining. We avoid + // adding basic blocks of the callee which can be proven to be dead for this + // particular call site in order to get more accurate cost estimates. This + // requires a somewhat heavyweight iteration pattern: we need to walk the + // basic blocks in a breadth-first order as we insert live successors. To + // accomplish this, prioritizing for small iterations because we exit after + // crossing our threshold, we use a small-size optimized SetVector. + typedef SetVector, + SmallPtrSet > BBSetVector; + BBSetVector BBWorklist; + BBWorklist.insert(&F.getEntryBlock()); + // Note that we *must not* cache the size, this loop grows the worklist. + for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { + // Bail out the moment we cross the threshold. This means we'll under-count + // the cost, but only when undercounting doesn't matter. + if (Cost > (Threshold + VectorBonus)) + break; + + BasicBlock *BB = BBWorklist[Idx]; + if (BB->empty()) + continue; + + // Handle the terminator cost here where we can track returns and other + // function-wide constructs. + TerminatorInst *TI = BB->getTerminator(); + + // We never want to inline functions that contain an indirectbr. This is + // incorrect because all the blockaddress's (in static global initializers + // for example) would be referring to the original function, and this + // indirect jump would jump from the inlined copy of the function into the + // original function which is extremely undefined behavior. + // FIXME: This logic isn't really right; we can safely inline functions + // with indirectbr's as long as no other function or global references the + // blockaddress of a block within the current function. And as a QOI issue, + // if someone is using a blockaddress without an indirectbr, and that + // reference somehow ends up in another function or global, we probably + // don't want to inline this function. + if (isa(TI)) + return false; + + if (!HasReturn && isa(TI)) + HasReturn = true; + else + Cost += InlineConstants::InstrCost; + + // Analyze the cost of this block. If we blow through the threshold, this + // returns false, and we can bail on out. + if (!analyzeBlock(BB)) { + if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) + return false; + + // If the caller is a recursive function then we don't want to inline + // functions which allocate a lot of stack space because it would increase + // the caller stack usage dramatically. + if (IsCallerRecursive && + AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) + return false; + + break; + } + + // Add in the live successors by first checking whether we have terminator + // that may be simplified based on the values simplified by this call. + if (BranchInst *BI = dyn_cast(TI)) { + if (BI->isConditional()) { + Value *Cond = BI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); + continue; + } + } + } else if (SwitchInst *SI = dyn_cast(TI)) { + Value *Cond = SI->getCondition(); + if (ConstantInt *SimpleCond + = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { + BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); + continue; + } + } + + // If we're unable to select a particular successor, just count all of + // them. + for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; + ++TIdx) + BBWorklist.insert(TI->getSuccessor(TIdx)); + + // If we had any successors at this point, than post-inlining is likely to + // have them as well. Note that we assume any basic blocks which existed + // due to branches or switches which folded above will also fold after + // inlining. + if (SingleBB && TI->getNumSuccessors() > 1) { + // Take off the bonus we applied to the threshold. + Threshold -= SingleBBBonus; + SingleBB = false; + } + } + + // If this is a noduplicate call, we can still inline as long as + // inlining this would cause the removal of the caller (so the instruction + // is not actually duplicated, just moved). + if (!OnlyOneCallAndLocalLinkage && ContainsNoDuplicateCall) + return false; + + Threshold += VectorBonus; + + return Cost < Threshold; +} + +#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +/// \brief Dump stats about this call's analysis. +void CallAnalyzer::dump() { +#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" + DEBUG_PRINT_STAT(NumConstantArgs); + DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); + DEBUG_PRINT_STAT(NumAllocaArgs); + DEBUG_PRINT_STAT(NumConstantPtrCmps); + DEBUG_PRINT_STAT(NumConstantPtrDiffs); + DEBUG_PRINT_STAT(NumInstructionsSimplified); + DEBUG_PRINT_STAT(SROACostSavings); + DEBUG_PRINT_STAT(SROACostSavingsLost); + DEBUG_PRINT_STAT(ContainsNoDuplicateCall); +#undef DEBUG_PRINT_STAT +} +#endif + +INITIALIZE_PASS_BEGIN(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) +INITIALIZE_AG_DEPENDENCY(TargetTransformInfo) +INITIALIZE_PASS_END(InlineCostAnalysis, "inline-cost", "Inline Cost Analysis", + true, true) + +char InlineCostAnalysis::ID = 0; + +InlineCostAnalysis::InlineCostAnalysis() : CallGraphSCCPass(ID), TD(0) {} + +InlineCostAnalysis::~InlineCostAnalysis() {} + +void InlineCostAnalysis::getAnalysisUsage(AnalysisUsage &AU) const { + AU.setPreservesAll(); + AU.addRequired(); + CallGraphSCCPass::getAnalysisUsage(AU); +} + +bool InlineCostAnalysis::runOnSCC(CallGraphSCC &SCC) { + TD = getAnalysisIfAvailable(); + TTI = &getAnalysis(); + return false; +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, int Threshold) { + return getInlineCost(CS, CS.getCalledFunction(), Threshold); +} + +InlineCost InlineCostAnalysis::getInlineCost(CallSite CS, Function *Callee, + int Threshold) { + // Cannot inline indirect calls. + if (!Callee) + return llvm::InlineCost::getNever(); + + // Calls to functions with always-inline attributes should be inlined + // whenever possible. + if (Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::AlwaysInline)) { + if (isInlineViable(*Callee)) + return llvm::InlineCost::getAlways(); + return llvm::InlineCost::getNever(); + } + + // Don't inline functions which can be redefined at link-time to mean + // something else. Don't inline functions marked noinline or call sites + // marked noinline. + if (Callee->mayBeOverridden() || + Callee->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::NoInline) || + CS.isNoInline()) + return llvm::InlineCost::getNever(); + + DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() + << "...\n"); + + CallAnalyzer CA(TD, *TTI, *Callee, Threshold); + bool ShouldInline = CA.analyzeCall(CS); + + DEBUG(CA.dump()); + + // Check if there was a reason to force inlining or no inlining. + if (!ShouldInline && CA.getCost() < CA.getThreshold()) + return InlineCost::getNever(); + if (ShouldInline && CA.getCost() >= CA.getThreshold()) + return InlineCost::getAlways(); + + return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); +} + +bool InlineCostAnalysis::isInlineViable(Function &F) { + bool ReturnsTwice = + F.getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::ReturnsTwice); + for (Function::iterator BI = F.begin(), BE = F.end(); BI != BE; ++BI) { + // Disallow inlining of functions which contain an indirect branch. + if (isa(BI->getTerminator())) + return false; + + for (BasicBlock::iterator II = BI->begin(), IE = BI->end(); II != IE; + ++II) { + CallSite CS(II); + if (!CS) + continue; + + // Disallow recursive calls. + if (&F == CS.getCalledFunction()) + return false; + + // Disallow calls which expose returns-twice to a function not previously + // attributed as such. + if (!ReturnsTwice && CS.isCall() && + cast(CS.getInstruction())->canReturnTwice()) + return false; + } + } + + return true; +} diff --git a/lib/Analysis/IVUsers.cpp b/lib/Analysis/IVUsers.cpp index d4221b89e0f6..b33e2cb9999e 100644 --- a/lib/Analysis/IVUsers.cpp +++ b/lib/Analysis/IVUsers.cpp @@ -14,17 +14,17 @@ #define DEBUG_TYPE "iv-users" #include "llvm/Analysis/IVUsers.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/Type.h" -#include "llvm/DerivedTypes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopPass.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DataLayout.h" #include "llvm/Assembly/Writer.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp deleted file mode 100644 index 5f51f775f142..000000000000 --- a/lib/Analysis/InlineCost.cpp +++ /dev/null @@ -1,1067 +0,0 @@ -//===- InlineCost.cpp - Cost analysis for inliner -------------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements inline cost analysis. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "inline-cost" -#include "llvm/Analysis/InlineCost.h" -#include "llvm/Analysis/ConstantFolding.h" -#include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Support/CallSite.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/CallingConv.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/GlobalAlias.h" -#include "llvm/DataLayout.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" - -using namespace llvm; - -STATISTIC(NumCallsAnalyzed, "Number of call sites analyzed"); - -namespace { - -class CallAnalyzer : public InstVisitor { - typedef InstVisitor Base; - friend class InstVisitor; - - // DataLayout if available, or null. - const DataLayout *const TD; - - // The called function. - Function &F; - - int Threshold; - int Cost; - const bool AlwaysInline; - - bool IsCallerRecursive; - bool IsRecursiveCall; - bool ExposesReturnsTwice; - bool HasDynamicAlloca; - /// Number of bytes allocated statically by the callee. - uint64_t AllocatedSize; - unsigned NumInstructions, NumVectorInstructions; - int FiftyPercentVectorBonus, TenPercentVectorBonus; - int VectorBonus; - - // While we walk the potentially-inlined instructions, we build up and - // maintain a mapping of simplified values specific to this callsite. The - // idea is to propagate any special information we have about arguments to - // this call through the inlinable section of the function, and account for - // likely simplifications post-inlining. The most important aspect we track - // is CFG altering simplifications -- when we prove a basic block dead, that - // can cause dramatic shifts in the cost of inlining a function. - DenseMap SimplifiedValues; - - // Keep track of the values which map back (through function arguments) to - // allocas on the caller stack which could be simplified through SROA. - DenseMap SROAArgValues; - - // The mapping of caller Alloca values to their accumulated cost savings. If - // we have to disable SROA for one of the allocas, this tells us how much - // cost must be added. - DenseMap SROAArgCosts; - - // Keep track of values which map to a pointer base and constant offset. - DenseMap > ConstantOffsetPtrs; - - // Custom simplification helper routines. - bool isAllocaDerivedArg(Value *V); - bool lookupSROAArgAndCost(Value *V, Value *&Arg, - DenseMap::iterator &CostIt); - void disableSROA(DenseMap::iterator CostIt); - void disableSROA(Value *V); - void accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost); - bool handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost); - bool isGEPOffsetConstant(GetElementPtrInst &GEP); - bool accumulateGEPOffset(GEPOperator &GEP, APInt &Offset); - ConstantInt *stripAndComputeInBoundsConstantOffsets(Value *&V); - - // Custom analysis routines. - bool analyzeBlock(BasicBlock *BB); - - // Disable several entry points to the visitor so we don't accidentally use - // them by declaring but not defining them here. - void visit(Module *); void visit(Module &); - void visit(Function *); void visit(Function &); - void visit(BasicBlock *); void visit(BasicBlock &); - - // Provide base case for our instruction visit. - bool visitInstruction(Instruction &I); - - // Our visit overrides. - bool visitAlloca(AllocaInst &I); - bool visitPHI(PHINode &I); - bool visitGetElementPtr(GetElementPtrInst &I); - bool visitBitCast(BitCastInst &I); - bool visitPtrToInt(PtrToIntInst &I); - bool visitIntToPtr(IntToPtrInst &I); - bool visitCastInst(CastInst &I); - bool visitUnaryInstruction(UnaryInstruction &I); - bool visitICmp(ICmpInst &I); - bool visitSub(BinaryOperator &I); - bool visitBinaryOperator(BinaryOperator &I); - bool visitLoad(LoadInst &I); - bool visitStore(StoreInst &I); - bool visitCallSite(CallSite CS); - -public: - CallAnalyzer(const DataLayout *TD, Function &Callee, int Threshold) - : TD(TD), F(Callee), Threshold(Threshold), Cost(0), - AlwaysInline(F.getFnAttributes().hasAttribute(Attributes::AlwaysInline)), - IsCallerRecursive(false), IsRecursiveCall(false), - ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0), - NumInstructions(0), NumVectorInstructions(0), - FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0), - NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0), - NumConstantPtrCmps(0), NumConstantPtrDiffs(0), - NumInstructionsSimplified(0), SROACostSavings(0), SROACostSavingsLost(0) { - } - - bool analyzeCall(CallSite CS); - - int getThreshold() { return Threshold; } - int getCost() { return Cost; } - bool isAlwaysInline() { return AlwaysInline; } - - // Keep a bunch of stats about the cost savings found so we can print them - // out when debugging. - unsigned NumConstantArgs; - unsigned NumConstantOffsetPtrArgs; - unsigned NumAllocaArgs; - unsigned NumConstantPtrCmps; - unsigned NumConstantPtrDiffs; - unsigned NumInstructionsSimplified; - unsigned SROACostSavings; - unsigned SROACostSavingsLost; - - void dump(); -}; - -} // namespace - -/// \brief Test whether the given value is an Alloca-derived function argument. -bool CallAnalyzer::isAllocaDerivedArg(Value *V) { - return SROAArgValues.count(V); -} - -/// \brief Lookup the SROA-candidate argument and cost iterator which V maps to. -/// Returns false if V does not map to a SROA-candidate. -bool CallAnalyzer::lookupSROAArgAndCost( - Value *V, Value *&Arg, DenseMap::iterator &CostIt) { - if (SROAArgValues.empty() || SROAArgCosts.empty()) - return false; - - DenseMap::iterator ArgIt = SROAArgValues.find(V); - if (ArgIt == SROAArgValues.end()) - return false; - - Arg = ArgIt->second; - CostIt = SROAArgCosts.find(Arg); - return CostIt != SROAArgCosts.end(); -} - -/// \brief Disable SROA for the candidate marked by this cost iterator. -/// -/// This marks the candidate as no longer viable for SROA, and adds the cost -/// savings associated with it back into the inline cost measurement. -void CallAnalyzer::disableSROA(DenseMap::iterator CostIt) { - // If we're no longer able to perform SROA we need to undo its cost savings - // and prevent subsequent analysis. - Cost += CostIt->second; - SROACostSavings -= CostIt->second; - SROACostSavingsLost += CostIt->second; - SROAArgCosts.erase(CostIt); -} - -/// \brief If 'V' maps to a SROA candidate, disable SROA for it. -void CallAnalyzer::disableSROA(Value *V) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(V, SROAArg, CostIt)) - disableSROA(CostIt); -} - -/// \brief Accumulate the given cost for a particular SROA candidate. -void CallAnalyzer::accumulateSROACost(DenseMap::iterator CostIt, - int InstructionCost) { - CostIt->second += InstructionCost; - SROACostSavings += InstructionCost; -} - -/// \brief Helper for the common pattern of handling a SROA candidate. -/// Either accumulates the cost savings if the SROA remains valid, or disables -/// SROA for the candidate. -bool CallAnalyzer::handleSROACandidate(bool IsSROAValid, - DenseMap::iterator CostIt, - int InstructionCost) { - if (IsSROAValid) { - accumulateSROACost(CostIt, InstructionCost); - return true; - } - - disableSROA(CostIt); - return false; -} - -/// \brief Check whether a GEP's indices are all constant. -/// -/// Respects any simplified values known during the analysis of this callsite. -bool CallAnalyzer::isGEPOffsetConstant(GetElementPtrInst &GEP) { - for (User::op_iterator I = GEP.idx_begin(), E = GEP.idx_end(); I != E; ++I) - if (!isa(*I) && !SimplifiedValues.lookup(*I)) - return false; - - return true; -} - -/// \brief Accumulate a constant GEP offset into an APInt if possible. -/// -/// Returns false if unable to compute the offset for any reason. Respects any -/// simplified values known during the analysis of this callsite. -bool CallAnalyzer::accumulateGEPOffset(GEPOperator &GEP, APInt &Offset) { - if (!TD) - return false; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); - GTI != GTE; ++GTI) { - ConstantInt *OpC = dyn_cast(GTI.getOperand()); - if (!OpC) - if (Constant *SimpleOp = SimplifiedValues.lookup(GTI.getOperand())) - OpC = dyn_cast(SimpleOp); - if (!OpC) - return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD->getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD->getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - -bool CallAnalyzer::visitAlloca(AllocaInst &I) { - // FIXME: Check whether inlining will turn a dynamic alloca into a static - // alloca, and handle that case. - - // Accumulate the allocated size. - if (I.isStaticAlloca()) { - Type *Ty = I.getAllocatedType(); - AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) : - Ty->getPrimitiveSizeInBits()); - } - - // We will happily inline static alloca instructions or dynamic alloca - // instructions in always-inline situations. - if (AlwaysInline || I.isStaticAlloca()) - return Base::visitAlloca(I); - - // FIXME: This is overly conservative. Dynamic allocas are inefficient for - // a variety of reasons, and so we would like to not inline them into - // functions which don't currently have a dynamic alloca. This simply - // disables inlining altogether in the presence of a dynamic alloca. - HasDynamicAlloca = true; - return false; -} - -bool CallAnalyzer::visitPHI(PHINode &I) { - // FIXME: We should potentially be tracking values through phi nodes, - // especially when they collapse to a single value due to deleted CFG edges - // during inlining. - - // FIXME: We need to propagate SROA *disabling* through phi nodes, even - // though we don't want to propagate it's bonuses. The idea is to disable - // SROA if it *might* be used in an inappropriate manner. - - // Phi nodes are always zero-cost. - return true; -} - -bool CallAnalyzer::visitGetElementPtr(GetElementPtrInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - bool SROACandidate = lookupSROAArgAndCost(I.getPointerOperand(), - SROAArg, CostIt); - - // Try to fold GEPs of constant-offset call site argument pointers. This - // requires target data and inbounds GEPs. - if (TD && I.isInBounds()) { - // Check if we have a base + offset for the pointer. - Value *Ptr = I.getPointerOperand(); - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Ptr); - if (BaseAndOffset.first) { - // Check if the offset of this GEP is constant, and if so accumulate it - // into Offset. - if (!accumulateGEPOffset(cast(I), BaseAndOffset.second)) { - // Non-constant GEPs aren't folded, and disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; - } - - // Add the result as a new mapping to Base + Offset. - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also handle SROA candidates here, we already know that the GEP is - // all-constant indexed. - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - return true; - } - } - - if (isGEPOffsetConstant(I)) { - if (SROACandidate) - SROAArgValues[&I] = SROAArg; - - // Constant GEPs are modeled as free. - return true; - } - - // Variable GEPs will require math and will disable SROA. - if (SROACandidate) - disableSROA(CostIt); - return false; -} - -bool CallAnalyzer::visitBitCast(BitCastInst &I) { - // Propagate constants through bitcasts. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getBitCast(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offsets through casts - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - // Casts don't change the offset, just wrap it up. - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - - // Also look for SROA candidates here. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - // Bitcasts are always zero cost. - return true; -} - -bool CallAnalyzer::visitPtrToInt(PtrToIntInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getPtrToInt(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when converted to a plain integer provided the - // integer is large enough to represent the pointer. - unsigned IntegerSize = I.getType()->getScalarSizeInBits(); - if (TD && IntegerSize >= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset - = ConstantOffsetPtrs.lookup(I.getOperand(0)); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // This is really weird. Technically, ptrtoint will disable SROA. However, - // unless that ptrtoint is *used* somewhere in the live basic blocks after - // inlining, it will be nuked, and SROA should proceed. All of the uses which - // would block SROA would also block SROA if applied directly to a pointer, - // and so we can just add the integer in here. The only places where SROA is - // preserved either cannot fire on an integer, or won't in-and-of themselves - // disable SROA (ext) w/o some later use that we would see and disable. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitIntToPtr(IntToPtrInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getIntToPtr(COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Track base/offset pairs when round-tripped through a pointer without - // modifications provided the integer is not too large. - Value *Op = I.getOperand(0); - unsigned IntegerSize = Op->getType()->getScalarSizeInBits(); - if (TD && IntegerSize <= TD->getPointerSizeInBits()) { - std::pair BaseAndOffset = ConstantOffsetPtrs.lookup(Op); - if (BaseAndOffset.first) - ConstantOffsetPtrs[&I] = BaseAndOffset; - } - - // "Propagate" SROA here in the same manner as we do for ptrtoint above. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(Op, SROAArg, CostIt)) - SROAArgValues[&I] = SROAArg; - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitCastInst(CastInst &I) { - // Propagate constants through ptrtoint. - if (Constant *COp = dyn_cast(I.getOperand(0))) - if (Constant *C = ConstantExpr::getCast(I.getOpcode(), COp, I.getType())) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable SROA in the face of arbitrary casts we don't whitelist elsewhere. - disableSROA(I.getOperand(0)); - - return isInstructionFree(&I, TD); -} - -bool CallAnalyzer::visitUnaryInstruction(UnaryInstruction &I) { - Value *Operand = I.getOperand(0); - Constant *Ops[1] = { dyn_cast(Operand) }; - if (Ops[0] || (Ops[0] = SimplifiedValues.lookup(Operand))) - if (Constant *C = ConstantFoldInstOperands(I.getOpcode(), I.getType(), - Ops, TD)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on the argument to arbitrary unary operators. - disableSROA(Operand); - - return false; -} - -bool CallAnalyzer::visitICmp(ICmpInst &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - // First try to handle simplified comparisons. - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - if (Constant *CLHS = dyn_cast(LHS)) - if (Constant *CRHS = dyn_cast(RHS)) - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - return true; - } - - // Otherwise look for a comparison between constant offset pointers with - // a common base. - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the icmp to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getICmp(I.getPredicate(), CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrCmps; - return true; - } - } - } - - // If the comparison is an equality comparison with null, we can simplify it - // for any alloca-derived argument. - if (I.isEquality() && isa(I.getOperand(1))) - if (isAllocaDerivedArg(I.getOperand(0))) { - // We can actually predict the result of comparisons between an - // alloca-derived value and null. Note that this fires regardless of - // SROA firing. - bool IsNotEqual = I.getPredicate() == CmpInst::ICMP_NE; - SimplifiedValues[&I] = IsNotEqual ? ConstantInt::getTrue(I.getType()) - : ConstantInt::getFalse(I.getType()); - return true; - } - - // Finally check for SROA candidates in comparisons. - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (isa(I.getOperand(1))) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitSub(BinaryOperator &I) { - // Try to handle a special case: we can fold computing the difference of two - // constant-related pointers. - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - Value *LHSBase, *RHSBase; - APInt LHSOffset, RHSOffset; - llvm::tie(LHSBase, LHSOffset) = ConstantOffsetPtrs.lookup(LHS); - if (LHSBase) { - llvm::tie(RHSBase, RHSOffset) = ConstantOffsetPtrs.lookup(RHS); - if (RHSBase && LHSBase == RHSBase) { - // We have common bases, fold the subtract to a constant based on the - // offsets. - Constant *CLHS = ConstantInt::get(LHS->getContext(), LHSOffset); - Constant *CRHS = ConstantInt::get(RHS->getContext(), RHSOffset); - if (Constant *C = ConstantExpr::getSub(CLHS, CRHS)) { - SimplifiedValues[&I] = C; - ++NumConstantPtrDiffs; - return true; - } - } - } - - // Otherwise, fall back to the generic logic for simplifying and handling - // instructions. - return Base::visitSub(I); -} - -bool CallAnalyzer::visitBinaryOperator(BinaryOperator &I) { - Value *LHS = I.getOperand(0), *RHS = I.getOperand(1); - if (!isa(LHS)) - if (Constant *SimpleLHS = SimplifiedValues.lookup(LHS)) - LHS = SimpleLHS; - if (!isa(RHS)) - if (Constant *SimpleRHS = SimplifiedValues.lookup(RHS)) - RHS = SimpleRHS; - Value *SimpleV = SimplifyBinOp(I.getOpcode(), LHS, RHS, TD); - if (Constant *C = dyn_cast_or_null(SimpleV)) { - SimplifiedValues[&I] = C; - return true; - } - - // Disable any SROA on arguments to arbitrary, unsimplified binary operators. - disableSROA(LHS); - disableSROA(RHS); - - return false; -} - -bool CallAnalyzer::visitLoad(LoadInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitStore(StoreInst &I) { - Value *SROAArg; - DenseMap::iterator CostIt; - if (lookupSROAArgAndCost(I.getOperand(0), SROAArg, CostIt)) { - if (I.isSimple()) { - accumulateSROACost(CostIt, InlineConstants::InstrCost); - return true; - } - - disableSROA(CostIt); - } - - return false; -} - -bool CallAnalyzer::visitCallSite(CallSite CS) { - if (CS.isCall() && cast(CS.getInstruction())->canReturnTwice() && - !F.getFnAttributes().hasAttribute(Attributes::ReturnsTwice)) { - // This aborts the entire analysis. - ExposesReturnsTwice = true; - return false; - } - - if (IntrinsicInst *II = dyn_cast(CS.getInstruction())) { - switch (II->getIntrinsicID()) { - default: - return Base::visitCallSite(CS); - - case Intrinsic::memset: - case Intrinsic::memcpy: - case Intrinsic::memmove: - // SROA can usually chew through these intrinsics, but they aren't free. - return false; - } - } - - if (Function *F = CS.getCalledFunction()) { - if (F == CS.getInstruction()->getParent()->getParent()) { - // This flag will fully abort the analysis, so don't bother with anything - // else. - IsRecursiveCall = true; - return false; - } - - if (!callIsSmall(CS)) { - // We account for the average 1 instruction per call argument setup - // here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Everything other than inline ASM will also have a significant cost - // merely from making the call. - if (!isa(CS.getCalledValue())) - Cost += InlineConstants::CallPenalty; - } - - return Base::visitCallSite(CS); - } - - // Otherwise we're in a very special case -- an indirect function call. See - // if we can be particularly clever about this. - Value *Callee = CS.getCalledValue(); - - // First, pay the price of the argument setup. We account for the average - // 1 instruction per call argument setup here. - Cost += CS.arg_size() * InlineConstants::InstrCost; - - // Next, check if this happens to be an indirect function call to a known - // function in this inline context. If not, we've done all we can. - Function *F = dyn_cast_or_null(SimplifiedValues.lookup(Callee)); - if (!F) - return Base::visitCallSite(CS); - - // If we have a constant that we are calling as a function, we can peer - // through it and see the function target. This happens not infrequently - // during devirtualization and so we want to give it a hefty bonus for - // inlining, but cap that bonus in the event that inlining wouldn't pan - // out. Pretend to inline the function, with a custom threshold. - CallAnalyzer CA(TD, *F, InlineConstants::IndirectCallThreshold); - if (CA.analyzeCall(CS)) { - // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); - } - - return Base::visitCallSite(CS); -} - -bool CallAnalyzer::visitInstruction(Instruction &I) { - // Some instructions are free. All of the free intrinsics can also be - // handled by SROA, etc. - if (isInstructionFree(&I, TD)) - return true; - - // We found something we don't understand or can't handle. Mark any SROA-able - // values in the operand list as no longer viable. - for (User::op_iterator OI = I.op_begin(), OE = I.op_end(); OI != OE; ++OI) - disableSROA(*OI); - - return false; -} - - -/// \brief Analyze a basic block for its contribution to the inline cost. -/// -/// This method walks the analyzer over every instruction in the given basic -/// block and accounts for their cost during inlining at this callsite. It -/// aborts early if the threshold has been exceeded or an impossible to inline -/// construct has been detected. It returns false if inlining is no longer -/// viable, and true if inlining remains viable. -bool CallAnalyzer::analyzeBlock(BasicBlock *BB) { - for (BasicBlock::iterator I = BB->begin(), E = llvm::prior(BB->end()); - I != E; ++I) { - ++NumInstructions; - if (isa(I) || I->getType()->isVectorTy()) - ++NumVectorInstructions; - - // If the instruction simplified to a constant, there is no cost to this - // instruction. Visit the instructions using our InstVisitor to account for - // all of the per-instruction logic. The visit tree returns true if we - // consumed the instruction in any way, and false if the instruction's base - // cost should count against inlining. - if (Base::visit(I)) - ++NumInstructionsSimplified; - else - Cost += InlineConstants::InstrCost; - - // If the visit this instruction detected an uninlinable pattern, abort. - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - if (NumVectorInstructions > NumInstructions/2) - VectorBonus = FiftyPercentVectorBonus; - else if (NumVectorInstructions > NumInstructions/10) - VectorBonus = TenPercentVectorBonus; - else - VectorBonus = 0; - - // Check if we've past the threshold so we don't spin in huge basic - // blocks that will never inline. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) - return false; - } - - return true; -} - -/// \brief Compute the base pointer and cumulative constant offsets for V. -/// -/// This strips all constant offsets off of V, leaving it the base pointer, and -/// accumulates the total constant offset applied in the returned constant. It -/// returns 0 if V is not a pointer, and returns the constant '0' if there are -/// no constant offsets applied. -ConstantInt *CallAnalyzer::stripAndComputeInBoundsConstantOffsets(Value *&V) { - if (!TD || !V->getType()->isPointerTy()) - return 0; - - unsigned IntPtrWidth = TD->getPointerSizeInBits(); - APInt Offset = APInt::getNullValue(IntPtrWidth); - - // Even though we don't look through PHI nodes, we could be called on an - // instruction in an unreachable block, which may be on a cycle. - SmallPtrSet Visited; - Visited.insert(V); - do { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(*GEP, Offset)) - return 0; - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - break; - V = GA->getAliasee(); - } else { - break; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } while (Visited.insert(V)); - - Type *IntPtrTy = TD->getIntPtrType(V->getContext()); - return cast(ConstantInt::get(IntPtrTy, Offset)); -} - -/// \brief Analyze a call site for potential inlining. -/// -/// Returns true if inlining this call is viable, and false if it is not -/// viable. It computes the cost and adjusts the threshold based on numerous -/// factors and heuristics. If this method returns false but the computed cost -/// is below the computed threshold, then inlining was forcibly disabled by -/// some artifact of the rountine. -bool CallAnalyzer::analyzeCall(CallSite CS) { - ++NumCallsAnalyzed; - - // Track whether the post-inlining function would have more than one basic - // block. A single basic block is often intended for inlining. Balloon the - // threshold by 50% until we pass the single-BB phase. - bool SingleBB = true; - int SingleBBBonus = Threshold / 2; - Threshold += SingleBBBonus; - - // Unless we are always-inlining, perform some tweaks to the cost and - // threshold based on the direct callsite information. - if (!AlwaysInline) { - // We want to more aggressively inline vector-dense kernels, so up the - // threshold, and we'll lower it if the % of vector instructions gets too - // low. - assert(NumInstructions == 0); - assert(NumVectorInstructions == 0); - FiftyPercentVectorBonus = Threshold; - TenPercentVectorBonus = Threshold / 2; - - // Give out bonuses per argument, as the instructions setting them up will - // be gone after inlining. - for (unsigned I = 0, E = CS.arg_size(); I != E; ++I) { - if (TD && CS.isByValArgument(I)) { - // We approximate the number of loads and stores needed by dividing the - // size of the byval type by the target's pointer size. - PointerType *PTy = cast(CS.getArgument(I)->getType()); - unsigned TypeSize = TD->getTypeSizeInBits(PTy->getElementType()); - unsigned PointerSize = TD->getPointerSizeInBits(); - // Ceiling division. - unsigned NumStores = (TypeSize + PointerSize - 1) / PointerSize; - - // If it generates more than 8 stores it is likely to be expanded as an - // inline memcpy so we take that as an upper bound. Otherwise we assume - // one load and one store per word copied. - // FIXME: The maxStoresPerMemcpy setting from the target should be used - // here instead of a magic number of 8, but it's not available via - // DataLayout. - NumStores = std::min(NumStores, 8U); - - Cost -= 2 * NumStores * InlineConstants::InstrCost; - } else { - // For non-byval arguments subtract off one instruction per call - // argument. - Cost -= InlineConstants::InstrCost; - } - } - - // If there is only one call of the function, and it has internal linkage, - // the cost of inlining it drops dramatically. - if (F.hasLocalLinkage() && F.hasOneUse() && &F == CS.getCalledFunction()) - Cost += InlineConstants::LastCallToStaticBonus; - - // If the instruction after the call, or if the normal destination of the - // invoke is an unreachable instruction, the function is noreturn. As such, - // there is little point in inlining this unless there is literally zero - // cost. - Instruction *Instr = CS.getInstruction(); - if (InvokeInst *II = dyn_cast(Instr)) { - if (isa(II->getNormalDest()->begin())) - Threshold = 1; - } else if (isa(++BasicBlock::iterator(Instr))) - Threshold = 1; - - // If this function uses the coldcc calling convention, prefer not to inline - // it. - if (F.getCallingConv() == CallingConv::Cold) - Cost += InlineConstants::ColdccPenalty; - - // Check if we're done. This can happen due to bonuses and penalties. - if (Cost > Threshold) - return false; - } - - if (F.empty()) - return true; - - Function *Caller = CS.getInstruction()->getParent()->getParent(); - // Check if the caller function is recursive itself. - for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end(); - U != E; ++U) { - CallSite Site(cast(*U)); - if (!Site) - continue; - Instruction *I = Site.getInstruction(); - if (I->getParent()->getParent() == Caller) { - IsCallerRecursive = true; - break; - } - } - - // Track whether we've seen a return instruction. The first return - // instruction is free, as at least one will usually disappear in inlining. - bool HasReturn = false; - - // Populate our simplified values by mapping from function arguments to call - // arguments with known important simplifications. - CallSite::arg_iterator CAI = CS.arg_begin(); - for (Function::arg_iterator FAI = F.arg_begin(), FAE = F.arg_end(); - FAI != FAE; ++FAI, ++CAI) { - assert(CAI != CS.arg_end()); - if (Constant *C = dyn_cast(CAI)) - SimplifiedValues[FAI] = C; - - Value *PtrArg = *CAI; - if (ConstantInt *C = stripAndComputeInBoundsConstantOffsets(PtrArg)) { - ConstantOffsetPtrs[FAI] = std::make_pair(PtrArg, C->getValue()); - - // We can SROA any pointer arguments derived from alloca instructions. - if (isa(PtrArg)) { - SROAArgValues[FAI] = PtrArg; - SROAArgCosts[PtrArg] = 0; - } - } - } - NumConstantArgs = SimplifiedValues.size(); - NumConstantOffsetPtrArgs = ConstantOffsetPtrs.size(); - NumAllocaArgs = SROAArgValues.size(); - - // The worklist of live basic blocks in the callee *after* inlining. We avoid - // adding basic blocks of the callee which can be proven to be dead for this - // particular call site in order to get more accurate cost estimates. This - // requires a somewhat heavyweight iteration pattern: we need to walk the - // basic blocks in a breadth-first order as we insert live successors. To - // accomplish this, prioritizing for small iterations because we exit after - // crossing our threshold, we use a small-size optimized SetVector. - typedef SetVector, - SmallPtrSet > BBSetVector; - BBSetVector BBWorklist; - BBWorklist.insert(&F.getEntryBlock()); - // Note that we *must not* cache the size, this loop grows the worklist. - for (unsigned Idx = 0; Idx != BBWorklist.size(); ++Idx) { - // Bail out the moment we cross the threshold. This means we'll under-count - // the cost, but only when undercounting doesn't matter. - if (!AlwaysInline && Cost > (Threshold + VectorBonus)) - break; - - BasicBlock *BB = BBWorklist[Idx]; - if (BB->empty()) - continue; - - // Handle the terminator cost here where we can track returns and other - // function-wide constructs. - TerminatorInst *TI = BB->getTerminator(); - - // We never want to inline functions that contain an indirectbr. This is - // incorrect because all the blockaddress's (in static global initializers - // for example) would be referring to the original function, and this - // indirect jump would jump from the inlined copy of the function into the - // original function which is extremely undefined behavior. - // FIXME: This logic isn't really right; we can safely inline functions - // with indirectbr's as long as no other function or global references the - // blockaddress of a block within the current function. And as a QOI issue, - // if someone is using a blockaddress without an indirectbr, and that - // reference somehow ends up in another function or global, we probably - // don't want to inline this function. - if (isa(TI)) - return false; - - if (!HasReturn && isa(TI)) - HasReturn = true; - else - Cost += InlineConstants::InstrCost; - - // Analyze the cost of this block. If we blow through the threshold, this - // returns false, and we can bail on out. - if (!analyzeBlock(BB)) { - if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca) - return false; - - // If the caller is a recursive function then we don't want to inline - // functions which allocate a lot of stack space because it would increase - // the caller stack usage dramatically. - if (IsCallerRecursive && - AllocatedSize > InlineConstants::TotalAllocaSizeRecursiveCaller) - return false; - - break; - } - - // Add in the live successors by first checking whether we have terminator - // that may be simplified based on the values simplified by this call. - if (BranchInst *BI = dyn_cast(TI)) { - if (BI->isConditional()) { - Value *Cond = BI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(BI->getSuccessor(SimpleCond->isZero() ? 1 : 0)); - continue; - } - } - } else if (SwitchInst *SI = dyn_cast(TI)) { - Value *Cond = SI->getCondition(); - if (ConstantInt *SimpleCond - = dyn_cast_or_null(SimplifiedValues.lookup(Cond))) { - BBWorklist.insert(SI->findCaseValue(SimpleCond).getCaseSuccessor()); - continue; - } - } - - // If we're unable to select a particular successor, just count all of - // them. - for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; - ++TIdx) - BBWorklist.insert(TI->getSuccessor(TIdx)); - - // If we had any successors at this point, than post-inlining is likely to - // have them as well. Note that we assume any basic blocks which existed - // due to branches or switches which folded above will also fold after - // inlining. - if (SingleBB && TI->getNumSuccessors() > 1) { - // Take off the bonus we applied to the threshold. - Threshold -= SingleBBBonus; - SingleBB = false; - } - } - - Threshold += VectorBonus; - - return AlwaysInline || Cost < Threshold; -} - -#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) -/// \brief Dump stats about this call's analysis. -void CallAnalyzer::dump() { -#define DEBUG_PRINT_STAT(x) llvm::dbgs() << " " #x ": " << x << "\n" - DEBUG_PRINT_STAT(NumConstantArgs); - DEBUG_PRINT_STAT(NumConstantOffsetPtrArgs); - DEBUG_PRINT_STAT(NumAllocaArgs); - DEBUG_PRINT_STAT(NumConstantPtrCmps); - DEBUG_PRINT_STAT(NumConstantPtrDiffs); - DEBUG_PRINT_STAT(NumInstructionsSimplified); - DEBUG_PRINT_STAT(SROACostSavings); - DEBUG_PRINT_STAT(SROACostSavingsLost); -#undef DEBUG_PRINT_STAT -} -#endif - -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, int Threshold) { - return getInlineCost(CS, CS.getCalledFunction(), Threshold); -} - -InlineCost InlineCostAnalyzer::getInlineCost(CallSite CS, Function *Callee, - int Threshold) { - // Don't inline functions which can be redefined at link-time to mean - // something else. Don't inline functions marked noinline or call sites - // marked noinline. - if (!Callee || Callee->mayBeOverridden() || - Callee->getFnAttributes().hasAttribute(Attributes::NoInline) || - CS.isNoInline()) - return llvm::InlineCost::getNever(); - - DEBUG(llvm::dbgs() << " Analyzing call of " << Callee->getName() - << "...\n"); - - CallAnalyzer CA(TD, *Callee, Threshold); - bool ShouldInline = CA.analyzeCall(CS); - - DEBUG(CA.dump()); - - // Check if there was a reason to force inlining or no inlining. - if (!ShouldInline && CA.getCost() < CA.getThreshold()) - return InlineCost::getNever(); - if (ShouldInline && (CA.isAlwaysInline() || - CA.getCost() >= CA.getThreshold())) - return InlineCost::getAlways(); - - return llvm::InlineCost::get(CA.getCost(), CA.getThreshold()); -} diff --git a/lib/Analysis/InstCount.cpp b/lib/Analysis/InstCount.cpp index 3b385d26ba3c..75a49eb90a88 100644 --- a/lib/Analysis/InstCount.cpp +++ b/lib/Analysis/InstCount.cpp @@ -13,13 +13,13 @@ #define DEBUG_TYPE "instcount" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" -#include "llvm/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(TotalInsts , "Number of instructions (of all types)"); @@ -30,7 +30,7 @@ STATISTIC(TotalMemInst, "Number of memory instructions"); #define HANDLE_INST(N, OPCODE, CLASS) \ STATISTIC(Num ## OPCODE ## Inst, "Number of " #OPCODE " insts"); -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" namespace { @@ -43,7 +43,7 @@ namespace { #define HANDLE_INST(N, OPCODE, CLASS) \ void visit##OPCODE(CLASS &) { ++Num##OPCODE##Inst; ++TotalInsts; } -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" void visitInstruction(Instruction &I) { errs() << "Instruction Count does not know about " << I; diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index a76e5ad1b8f8..4a3c74e9db35 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -18,20 +18,20 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "instsimplify" -#include "llvm/GlobalAlias.h" -#include "llvm/Operator.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SetVector.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/Analysis/MemoryBuiltins.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/PatternMatch.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/DataLayout.h" using namespace llvm; using namespace llvm::PatternMatch; @@ -657,51 +657,26 @@ Value *llvm::SimplifyAddInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } -/// \brief Accumulate the constant integer offset a GEP represents. -/// -/// Given a getelementptr instruction/constantexpr, accumulate the constant -/// offset from the base pointer into the provided APInt 'Offset'. Returns true -/// if the GEP has all-constant indices. Returns false if any non-constant -/// index is encountered leaving the 'Offset' in an undefined state. The -/// 'Offset' APInt must be the bitwidth of the target's pointer size. -static bool accumulateGEPOffset(const DataLayout &TD, GEPOperator *GEP, - APInt &Offset) { - unsigned IntPtrWidth = TD.getPointerSizeInBits(); - assert(IntPtrWidth == Offset.getBitWidth()); - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->op_begin() + 1, E = GEP->op_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = dyn_cast(*I); - if (!OpC) return false; - if (OpC->isZero()) continue; - - // Handle a struct index, which adds its field offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - unsigned ElementIdx = OpC->getZExtValue(); - const StructLayout *SL = TD.getStructLayout(STy); - Offset += APInt(IntPtrWidth, SL->getElementOffset(ElementIdx)); - continue; - } - - APInt TypeSize(IntPtrWidth, TD.getTypeAllocSize(GTI.getIndexedType())); - Offset += OpC->getValue().sextOrTrunc(IntPtrWidth) * TypeSize; - } - return true; -} - /// \brief Compute the base pointer and cumulative constant offsets for V. /// /// This strips all constant offsets off of V, leaving it the base pointer, and /// accumulates the total constant offset applied in the returned constant. It /// returns 0 if V is not a pointer, and returns the constant '0' if there are /// no constant offsets applied. -static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, +/// +/// This is very similar to GetPointerBaseWithConstantOffset except it doesn't +/// follow non-inbounds geps. This allows it to remain usable for icmp ult/etc. +/// folding. +static Constant *stripAndComputeConstantOffsets(const DataLayout *TD, Value *&V) { - if (!V->getType()->isPointerTy()) - return 0; + assert(V->getType()->getScalarType()->isPointerTy()); + + // Without DataLayout, just be conservative for now. Theoretically, more could + // be done in this case. + if (!TD) + return ConstantInt::get(IntegerType::get(V->getContext(), 64), 0); - unsigned IntPtrWidth = TD.getPointerSizeInBits(); + unsigned IntPtrWidth = TD->getPointerSizeInBits(); APInt Offset = APInt::getNullValue(IntPtrWidth); // Even though we don't look through PHI nodes, we could be called on an @@ -710,7 +685,7 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, Visited.insert(V); do { if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->isInBounds() || !accumulateGEPOffset(TD, GEP, Offset)) + if (!GEP->isInBounds() || !GEP->accumulateConstantOffset(*TD, Offset)) break; V = GEP->getPointerOperand(); } else if (Operator::getOpcode(V) == Instruction::BitCast) { @@ -722,23 +697,24 @@ static Constant *stripAndComputeConstantOffsets(const DataLayout &TD, } else { break; } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); + assert(V->getType()->getScalarType()->isPointerTy() && + "Unexpected operand type!"); } while (Visited.insert(V)); - Type *IntPtrTy = TD.getIntPtrType(V->getContext()); - return ConstantInt::get(IntPtrTy, Offset); + Type *IntPtrTy = TD->getIntPtrType(V->getContext()); + Constant *OffsetIntPtr = ConstantInt::get(IntPtrTy, Offset); + if (V->getType()->isVectorTy()) + return ConstantVector::getSplat(V->getType()->getVectorNumElements(), + OffsetIntPtr); + return OffsetIntPtr; } /// \brief Compute the constant difference between two pointer values. /// If the difference is not a constant, returns zero. -static Constant *computePointerDifference(const DataLayout &TD, +static Constant *computePointerDifference(const DataLayout *TD, Value *LHS, Value *RHS) { Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; // If LHS and RHS are not related via constant offsets to the same base // value, there is nothing we can do here. @@ -852,9 +828,9 @@ static Value *SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, return W; // Variations on GEP(base, I, ...) - GEP(base, i, ...) -> GEP(null, I-i, ...). - if (Q.TD && match(Op0, m_PtrToInt(m_Value(X))) && + if (match(Op0, m_PtrToInt(m_Value(X))) && match(Op1, m_PtrToInt(m_Value(Y)))) - if (Constant *Result = computePointerDifference(*Q.TD, X, Y)) + if (Constant *Result = computePointerDifference(Q.TD, X, Y)) return ConstantExpr::getIntegerCast(Result, Op0->getType(), true); // Mul distributes over Sub. Try some generic simplifications based on this. @@ -886,6 +862,112 @@ Value *llvm::SimplifySubInst(Value *Op0, Value *Op1, bool isNSW, bool isNUW, RecursionLimit); } +/// Given operands for an FAdd, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FAdd, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fadd X, -0 ==> X + if (match(Op1, m_NegZero())) + return Op0; + + // fadd X, 0 ==> X, when we know X is not -0 + if (match(Op1, m_Zero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fadd [nnan ninf] X, (fsub [nnan ninf] 0, X) ==> 0 + // where nnan and ninf have to occur at least once somewhere in this + // expression + Value *SubOp = 0; + if (match(Op1, m_FSub(m_AnyZero(), m_Specific(Op0)))) + SubOp = Op1; + else if (match(Op0, m_FSub(m_AnyZero(), m_Specific(Op1)))) + SubOp = Op0; + if (SubOp) { + Instruction *FSub = cast(SubOp); + if ((FMF.noNaNs() || FSub->hasNoNaNs()) && + (FMF.noInfs() || FSub->hasNoInfs())) + return Constant::getNullValue(Op0->getType()); + } + + return 0; +} + +/// Given operands for an FSub, see if we can fold the result. If not, this +/// returns null. +static Value *SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const Query &Q, unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FSub, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + } + + // fsub X, 0 ==> X + if (match(Op1, m_Zero())) + return Op0; + + // fsub X, -0 ==> X, when we know X is not -0 + if (match(Op1, m_NegZero()) && + (FMF.noSignedZeros() || CannotBeNegativeZero(Op0))) + return Op0; + + // fsub 0, (fsub -0.0, X) ==> X + Value *X; + if (match(Op0, m_AnyZero())) { + if (match(Op1, m_FSub(m_NegZero(), m_Value(X)))) + return X; + if (FMF.noSignedZeros() && match(Op1, m_FSub(m_AnyZero(), m_Value(X)))) + return X; + } + + // fsub nnan ninf x, x ==> 0.0 + if (FMF.noNaNs() && FMF.noInfs() && Op0 == Op1) + return Constant::getNullValue(Op0->getType()); + + return 0; +} + +/// Given the operands for an FMul, see if we can fold the result +static Value *SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const Query &Q, + unsigned MaxRecurse) { + if (Constant *CLHS = dyn_cast(Op0)) { + if (Constant *CRHS = dyn_cast(Op1)) { + Constant *Ops[] = { CLHS, CRHS }; + return ConstantFoldInstOperands(Instruction::FMul, CLHS->getType(), + Ops, Q.TD, Q.TLI); + } + + // Canonicalize the constant to the RHS. + std::swap(Op0, Op1); + } + + // fmul X, 1.0 ==> X + if (match(Op1, m_FPOne())) + return Op0; + + // fmul nnan nsz X, 0 ==> 0 + if (FMF.noNaNs() && FMF.noSignedZeros() && match(Op1, m_AnyZero())) + return Op1; + + return 0; +} + /// SimplifyMulInst - Given operands for a Mul, see if we can /// fold the result. If not, this returns null. static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, @@ -951,6 +1033,26 @@ static Value *SimplifyMulInst(Value *Op0, Value *Op1, const Query &Q, return 0; } +Value *llvm::SimplifyFAddInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFAddInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFSubInst(Value *Op0, Value *Op1, FastMathFlags FMF, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFSubInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + +Value *llvm::SimplifyFMulInst(Value *Op0, Value *Op1, + FastMathFlags FMF, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyFMulInst(Op0, Op1, FMF, Query (TD, TLI, DT), RecursionLimit); +} + Value *llvm::SimplifyMulInst(Value *Op0, Value *Op1, const DataLayout *TD, const TargetLibraryInfo *TLI, const DominatorTree *DT) { @@ -1364,9 +1466,9 @@ static Value *SimplifyAndInst(Value *Op0, Value *Op1, const Query &Q, // A & (-A) = A if A is a power of two or zero. if (match(Op0, m_Neg(m_Specific(Op1))) || match(Op1, m_Neg(m_Specific(Op0)))) { - if (isPowerOfTwo(Op0, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op0, /*OrZero*/true)) return Op0; - if (isPowerOfTwo(Op1, Q.TD, /*OrZero*/true)) + if (isKnownToBeAPowerOfTwo(Op1, /*OrZero*/true)) return Op1; } @@ -1591,9 +1693,48 @@ static Value *ExtractEquivalentCondition(Value *V, CmpInst::Predicate Pred, return 0; } -static Constant *computePointerICmp(const DataLayout &TD, +// A significant optimization not implemented here is assuming that alloca +// addresses are not equal to incoming argument values. They don't *alias*, +// as we say, but that doesn't mean they aren't equal, so we take a +// conservative approach. +// +// This is inspired in part by C++11 5.10p1: +// "Two pointers of the same type compare equal if and only if they are both +// null, both point to the same function, or both represent the same +// address." +// +// This is pretty permissive. +// +// It's also partly due to C11 6.5.9p6: +// "Two pointers compare equal if and only if both are null pointers, both are +// pointers to the same object (including a pointer to an object and a +// subobject at its beginning) or function, both are pointers to one past the +// last element of the same array object, or one is a pointer to one past the +// end of one array object and the other is a pointer to the start of a +// different array object that happens to immediately follow the first array +// object in the address space.) +// +// C11's version is more restrictive, however there's no reason why an argument +// couldn't be a one-past-the-end value for a stack object in the caller and be +// equal to the beginning of a stack object in the callee. +// +// If the C and C++ standards are ever made sufficiently restrictive in this +// area, it may be possible to update LLVM's semantics accordingly and reinstate +// this optimization. +static Constant *computePointerICmp(const DataLayout *TD, + const TargetLibraryInfo *TLI, CmpInst::Predicate Pred, Value *LHS, Value *RHS) { + // First, skip past any trivial no-ops. + LHS = LHS->stripPointerCasts(); + RHS = RHS->stripPointerCasts(); + + // A non-null pointer is not equal to a null pointer. + if (llvm::isKnownNonNull(LHS) && isa(RHS) && + (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE)) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + // We can only fold certain predicates on pointer comparisons. switch (Pred) { default: @@ -1616,19 +1757,83 @@ static Constant *computePointerICmp(const DataLayout &TD, break; } + // Strip off any constant offsets so that we can reason about them. + // It's tempting to use getUnderlyingObject or even just stripInBoundsOffsets + // here and compare base addresses like AliasAnalysis does, however there are + // numerous hazards. AliasAnalysis and its utilities rely on special rules + // governing loads and stores which don't apply to icmps. Also, AliasAnalysis + // doesn't need to guarantee pointer inequality when it says NoAlias. Constant *LHSOffset = stripAndComputeConstantOffsets(TD, LHS); - if (!LHSOffset) - return 0; Constant *RHSOffset = stripAndComputeConstantOffsets(TD, RHS); - if (!RHSOffset) - return 0; - // If LHS and RHS are not related via constant offsets to the same base - // value, there is nothing we can do here. - if (LHS != RHS) - return 0; + // If LHS and RHS are related via constant offsets to the same base + // value, we can replace it with an icmp which just compares the offsets. + if (LHS == RHS) + return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + + // Various optimizations for (in)equality comparisons. + if (Pred == CmpInst::ICMP_EQ || Pred == CmpInst::ICMP_NE) { + // Different non-empty allocations that exist at the same time have + // different addresses (if the program can tell). Global variables always + // exist, so they always exist during the lifetime of each other and all + // allocas. Two different allocas usually have different addresses... + // + // However, if there's an @llvm.stackrestore dynamically in between two + // allocas, they may have the same address. It's tempting to reduce the + // scope of the problem by only looking at *static* allocas here. That would + // cover the majority of allocas while significantly reducing the likelihood + // of having an @llvm.stackrestore pop up in the middle. However, it's not + // actually impossible for an @llvm.stackrestore to pop up in the middle of + // an entry block. Also, if we have a block that's not attached to a + // function, we can't tell if it's "static" under the current definition. + // Theoretically, this problem could be fixed by creating a new kind of + // instruction kind specifically for static allocas. Such a new instruction + // could be required to be at the top of the entry block, thus preventing it + // from being subject to a @llvm.stackrestore. Instcombine could even + // convert regular allocas into these special allocas. It'd be nifty. + // However, until then, this problem remains open. + // + // So, we'll assume that two non-empty allocas have different addresses + // for now. + // + // With all that, if the offsets are within the bounds of their allocations + // (and not one-past-the-end! so we can't use inbounds!), and their + // allocations aren't the same, the pointers are not equal. + // + // Note that it's not necessary to check for LHS being a global variable + // address, due to canonicalization and constant folding. + if (isa(LHS) && + (isa(RHS) || isa(RHS))) { + ConstantInt *LHSOffsetCI = dyn_cast(LHSOffset); + ConstantInt *RHSOffsetCI = dyn_cast(RHSOffset); + uint64_t LHSSize, RHSSize; + if (LHSOffsetCI && RHSOffsetCI && + getObjectSize(LHS, LHSSize, TD, TLI) && + getObjectSize(RHS, RHSSize, TD, TLI)) { + const APInt &LHSOffsetValue = LHSOffsetCI->getValue(); + const APInt &RHSOffsetValue = RHSOffsetCI->getValue(); + if (!LHSOffsetValue.isNegative() && + !RHSOffsetValue.isNegative() && + LHSOffsetValue.ult(LHSSize) && + RHSOffsetValue.ult(RHSSize)) { + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } - return ConstantExpr::getICmp(Pred, LHSOffset, RHSOffset); + // Repeat the above check but this time without depending on DataLayout + // or being able to compute a precise size. + if (!cast(LHS->getType())->isEmptyTy() && + !cast(RHS->getType())->isEmptyTy() && + LHSOffset->isNullValue() && + RHSOffset->isNullValue()) + return ConstantInt::get(GetCompareTy(LHS), + !CmpInst::isTrueWhenEqual(Pred)); + } + } + + // Otherwise, fail. + return 0; } /// SimplifyICmpInst - Given operands for an ICmpInst, see if we can @@ -1693,62 +1898,6 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, } } - // icmp , - Different identified objects have - // different addresses (unless null), and what's more the address of an - // identified local is never equal to another argument (again, barring null). - // Note that generalizing to the case where LHS is a global variable address - // or null is pointless, since if both LHS and RHS are constants then we - // already constant folded the compare, and if only one of them is then we - // moved it to RHS already. - Value *LHSPtr = LHS->stripPointerCasts(); - Value *RHSPtr = RHS->stripPointerCasts(); - if (LHSPtr == RHSPtr) - return ConstantInt::get(ITy, CmpInst::isTrueWhenEqual(Pred)); - - // Be more aggressive about stripping pointer adjustments when checking a - // comparison of an alloca address to another object. We can rip off all - // inbounds GEP operations, even if they are variable. - LHSPtr = LHSPtr->stripInBoundsOffsets(); - if (llvm::isIdentifiedObject(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - if (llvm::isKnownNonNull(LHSPtr) || llvm::isKnownNonNull(RHSPtr)) { - // If both sides are different identified objects, they aren't equal - // unless they're null. - if (LHSPtr != RHSPtr && llvm::isIdentifiedObject(RHSPtr) && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - - // A local identified object (alloca or noalias call) can't equal any - // incoming argument, unless they're both null or they belong to - // different functions. The latter happens during inlining. - if (Instruction *LHSInst = dyn_cast(LHSPtr)) - if (Argument *RHSArg = dyn_cast(RHSPtr)) - if (LHSInst->getParent()->getParent() == RHSArg->getParent() && - Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - } - - // Assume that the constant null is on the right. - if (llvm::isKnownNonNull(LHSPtr) && isa(RHSPtr)) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } else if (Argument *LHSArg = dyn_cast(LHSPtr)) { - RHSPtr = RHSPtr->stripInBoundsOffsets(); - // An alloca can't be equal to an argument unless they come from separate - // functions via inlining. - if (AllocaInst *RHSInst = dyn_cast(RHSPtr)) { - if (LHSArg->getParent() == RHSInst->getParent()->getParent()) { - if (Pred == CmpInst::ICMP_EQ) - return ConstantInt::get(ITy, false); - else if (Pred == CmpInst::ICMP_NE) - return ConstantInt::get(ITy, true); - } - } - } - // If we are comparing with zero then try hard since this is a common case. if (match(RHS, m_Zero())) { bool LHSKnownNonNegative, LHSKnownNegative; @@ -2375,8 +2524,8 @@ static Value *SimplifyICmpInst(unsigned Predicate, Value *LHS, Value *RHS, // Simplify comparisons of related pointers using a powerful, recursive // GEP-walk when we have target data available.. - if (Q.TD && LHS->getType()->isPointerTy() && RHS->getType()->isPointerTy()) - if (Constant *C = computePointerICmp(*Q.TD, Pred, LHS, RHS)) + if (LHS->getType()->isPointerTy()) + if (Constant *C = computePointerICmp(Q.TD, Q.TLI, Pred, LHS, RHS)) return C; if (GetElementPtrInst *GLHS = dyn_cast(LHS)) { @@ -2697,10 +2846,18 @@ static Value *SimplifyBinOp(unsigned Opcode, Value *LHS, Value *RHS, case Instruction::Add: return SimplifyAddInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FAdd: + return SimplifyFAddInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Sub: return SimplifySubInst(LHS, RHS, /*isNSW*/false, /*isNUW*/false, Q, MaxRecurse); + case Instruction::FSub: + return SimplifyFSubInst(LHS, RHS, FastMathFlags(), Q, MaxRecurse); + case Instruction::Mul: return SimplifyMulInst (LHS, RHS, Q, MaxRecurse); + case Instruction::FMul: + return SimplifyFMulInst (LHS, RHS, FastMathFlags(), Q, MaxRecurse); case Instruction::SDiv: return SimplifySDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::UDiv: return SimplifyUDivInst(LHS, RHS, Q, MaxRecurse); case Instruction::FDiv: return SimplifyFDivInst(LHS, RHS, Q, MaxRecurse); @@ -2768,14 +2925,88 @@ Value *llvm::SimplifyCmpInst(unsigned Predicate, Value *LHS, Value *RHS, RecursionLimit); } -static Value *SimplifyCallInst(CallInst *CI, const Query &) { - // call undef -> undef - if (isa(CI->getCalledValue())) - return UndefValue::get(CI->getType()); +static bool IsIdempotent(Intrinsic::ID ID) { + switch (ID) { + default: return false; + + // Unary idempotent: f(f(x)) = f(x) + case Intrinsic::fabs: + case Intrinsic::floor: + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: + return true; + } +} + +template +static Value *SimplifyIntrinsic(Intrinsic::ID IID, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + // Perform idempotent optimizations + if (!IsIdempotent(IID)) + return 0; + + // Unary Ops + if (std::distance(ArgBegin, ArgEnd) == 1) + if (IntrinsicInst *II = dyn_cast(*ArgBegin)) + if (II->getIntrinsicID() == IID) + return II; return 0; } +template +static Value *SimplifyCall(Value *V, IterTy ArgBegin, IterTy ArgEnd, + const Query &Q, unsigned MaxRecurse) { + Type *Ty = V->getType(); + if (PointerType *PTy = dyn_cast(Ty)) + Ty = PTy->getElementType(); + FunctionType *FTy = cast(Ty); + + // call undef -> undef + if (isa(V)) + return UndefValue::get(FTy->getReturnType()); + + Function *F = dyn_cast(V); + if (!F) + return 0; + + if (unsigned IID = F->getIntrinsicID()) + if (Value *Ret = + SimplifyIntrinsic((Intrinsic::ID) IID, ArgBegin, ArgEnd, Q, MaxRecurse)) + return Ret; + + if (!canConstantFoldCallTo(F)) + return 0; + + SmallVector ConstantArgs; + ConstantArgs.reserve(ArgEnd - ArgBegin); + for (IterTy I = ArgBegin, E = ArgEnd; I != E; ++I) { + Constant *C = dyn_cast(*I); + if (!C) + return 0; + ConstantArgs.push_back(C); + } + + return ConstantFoldCall(F, ConstantArgs, Q.TLI); +} + +Value *llvm::SimplifyCall(Value *V, User::op_iterator ArgBegin, + User::op_iterator ArgEnd, const DataLayout *TD, + const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, ArgBegin, ArgEnd, Query(TD, TLI, DT), + RecursionLimit); +} + +Value *llvm::SimplifyCall(Value *V, ArrayRef Args, + const DataLayout *TD, const TargetLibraryInfo *TLI, + const DominatorTree *DT) { + return ::SimplifyCall(V, Args.begin(), Args.end(), Query(TD, TLI, DT), + RecursionLimit); +} + /// SimplifyInstruction - See if we can compute a simplified version of this /// instruction. If not, this returns null. Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, @@ -2787,18 +3018,30 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, default: Result = ConstantFoldInstruction(I, TD, TLI); break; + case Instruction::FAdd: + Result = SimplifyFAddInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Add: Result = SimplifyAddInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FSub: + Result = SimplifyFSubInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Sub: Result = SimplifySubInst(I->getOperand(0), I->getOperand(1), cast(I)->hasNoSignedWrap(), cast(I)->hasNoUnsignedWrap(), TD, TLI, DT); break; + case Instruction::FMul: + Result = SimplifyFMulInst(I->getOperand(0), I->getOperand(1), + I->getFastMathFlags(), TD, TLI, DT); + break; case Instruction::Mul: Result = SimplifyMulInst(I->getOperand(0), I->getOperand(1), TD, TLI, DT); break; @@ -2872,9 +3115,12 @@ Value *llvm::SimplifyInstruction(Instruction *I, const DataLayout *TD, case Instruction::PHI: Result = SimplifyPHINode(cast(I), Query (TD, TLI, DT)); break; - case Instruction::Call: - Result = SimplifyCallInst(cast(I), Query (TD, TLI, DT)); + case Instruction::Call: { + CallSite CS(cast(I)); + Result = SimplifyCall(CS.getCalledValue(), CS.arg_begin(), CS.arg_end(), + TD, TLI, DT); break; + } case Instruction::Trunc: Result = SimplifyTruncInst(I->getOperand(0), I->getType(), TD, TLI, DT); break; diff --git a/lib/Analysis/Interval.cpp b/lib/Analysis/Interval.cpp index ca9cdcaf2464..26a0322407ec 100644 --- a/lib/Analysis/Interval.cpp +++ b/lib/Analysis/Interval.cpp @@ -13,7 +13,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Interval.h" -#include "llvm/BasicBlock.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/CFG.h" #include "llvm/Support/raw_ostream.h" #include diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 2b87d80d3732..66b5e852c02f 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -13,23 +13,22 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "lazy-value-info" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/LazyValueInfo.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/ConstantFolding.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" #include "llvm/Support/CFG.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLibraryInfo.h" #include #include using namespace llvm; diff --git a/lib/Analysis/LibCallAliasAnalysis.cpp b/lib/Analysis/LibCallAliasAnalysis.cpp index efb722bb97c4..fefa51660f92 100644 --- a/lib/Analysis/LibCallAliasAnalysis.cpp +++ b/lib/Analysis/LibCallAliasAnalysis.cpp @@ -12,9 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LibCallAliasAnalysis.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/LibCallSemantics.h" -#include "llvm/Function.h" +#include "llvm/Analysis/Passes.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/LibCallSemantics.cpp b/lib/Analysis/LibCallSemantics.cpp index 81b0f46f3740..0592ccb26c12 100644 --- a/lib/Analysis/LibCallSemantics.cpp +++ b/lib/Analysis/LibCallSemantics.cpp @@ -15,7 +15,7 @@ #include "llvm/Analysis/LibCallSemantics.h" #include "llvm/ADT/StringMap.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" using namespace llvm; /// getMap - This impl pointer in ~LibCallInfo is actually a StringMap. This diff --git a/lib/Analysis/Lint.cpp b/lib/Analysis/Lint.cpp index 6d6d580ed19a..9393508a9e67 100644 --- a/lib/Analysis/Lint.cpp +++ b/lib/Analysis/Lint.cpp @@ -34,26 +34,26 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/Lint.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/Lint.h" +#include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/Loads.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/InstVisitor.h" #include "llvm/Pass.h" #include "llvm/PassManager.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetLibraryInfo.h" using namespace llvm; namespace { @@ -412,51 +412,49 @@ void Lint::visitMemoryReference(Instruction &I, } // Check for buffer overflows and misalignment. - if (TD) { - // Only handles memory references that read/write something simple like an - // alloca instruction or a global variable. - int64_t Offset = 0; - if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, *TD)) { - // OK, so the access is to a constant offset from Ptr. Check that Ptr is - // something we can handle and if so extract the size of this base object - // along with its alignment. - uint64_t BaseSize = AliasAnalysis::UnknownSize; - unsigned BaseAlign = 0; - - if (AllocaInst *AI = dyn_cast(Base)) { - Type *ATy = AI->getAllocatedType(); - if (!AI->isArrayAllocation() && ATy->isSized()) - BaseSize = TD->getTypeAllocSize(ATy); - BaseAlign = AI->getAlignment(); - if (BaseAlign == 0 && ATy->isSized()) - BaseAlign = TD->getABITypeAlignment(ATy); - } else if (GlobalVariable *GV = dyn_cast(Base)) { - // If the global may be defined differently in another compilation unit - // then don't warn about funky memory accesses. - if (GV->hasDefinitiveInitializer()) { - Type *GTy = GV->getType()->getElementType(); - if (GTy->isSized()) - BaseSize = TD->getTypeAllocSize(GTy); - BaseAlign = GV->getAlignment(); - if (BaseAlign == 0 && GTy->isSized()) - BaseAlign = TD->getABITypeAlignment(GTy); - } + // Only handles memory references that read/write something simple like an + // alloca instruction or a global variable. + int64_t Offset = 0; + if (Value *Base = GetPointerBaseWithConstantOffset(Ptr, Offset, TD)) { + // OK, so the access is to a constant offset from Ptr. Check that Ptr is + // something we can handle and if so extract the size of this base object + // along with its alignment. + uint64_t BaseSize = AliasAnalysis::UnknownSize; + unsigned BaseAlign = 0; + + if (AllocaInst *AI = dyn_cast(Base)) { + Type *ATy = AI->getAllocatedType(); + if (TD && !AI->isArrayAllocation() && ATy->isSized()) + BaseSize = TD->getTypeAllocSize(ATy); + BaseAlign = AI->getAlignment(); + if (TD && BaseAlign == 0 && ATy->isSized()) + BaseAlign = TD->getABITypeAlignment(ATy); + } else if (GlobalVariable *GV = dyn_cast(Base)) { + // If the global may be defined differently in another compilation unit + // then don't warn about funky memory accesses. + if (GV->hasDefinitiveInitializer()) { + Type *GTy = GV->getType()->getElementType(); + if (TD && GTy->isSized()) + BaseSize = TD->getTypeAllocSize(GTy); + BaseAlign = GV->getAlignment(); + if (TD && BaseAlign == 0 && GTy->isSized()) + BaseAlign = TD->getABITypeAlignment(GTy); } - - // Accesses from before the start or after the end of the object are not - // defined. - Assert1(Size == AliasAnalysis::UnknownSize || - BaseSize == AliasAnalysis::UnknownSize || - (Offset >= 0 && Offset + Size <= BaseSize), - "Undefined behavior: Buffer overflow", &I); - - // Accesses that say that the memory is more aligned than it is are not - // defined. - if (Align == 0 && Ty && Ty->isSized()) - Align = TD->getABITypeAlignment(Ty); - Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), - "Undefined behavior: Memory reference address is misaligned", &I); } + + // Accesses from before the start or after the end of the object are not + // defined. + Assert1(Size == AliasAnalysis::UnknownSize || + BaseSize == AliasAnalysis::UnknownSize || + (Offset >= 0 && Offset + Size <= BaseSize), + "Undefined behavior: Buffer overflow", &I); + + // Accesses that say that the memory is more aligned than it is are not + // defined. + if (TD && Align == 0 && Ty && Ty->isSized()) + Align = TD->getABITypeAlignment(Ty); + Assert1(!BaseAlign || Align <= MinAlign(BaseAlign, Offset), + "Undefined behavior: Memory reference address is misaligned", &I); } } diff --git a/lib/Analysis/Loads.cpp b/lib/Analysis/Loads.cpp index 73aa8b49cda5..0902a39a9f81 100644 --- a/lib/Analysis/Loads.cpp +++ b/lib/Analysis/Loads.cpp @@ -13,12 +13,13 @@ #include "llvm/Analysis/Loads.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/DataLayout.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" using namespace llvm; /// AreEquivalentAddressValues - Test if A and B will obviously have the same @@ -48,48 +49,18 @@ static bool AreEquivalentAddressValues(const Value *A, const Value *B) { return false; } -/// getUnderlyingObjectWithOffset - Strip off up to MaxLookup GEPs and -/// bitcasts to get back to the underlying object being addressed, keeping -/// track of the offset in bytes from the GEPs relative to the result. -/// This is closely related to GetUnderlyingObject but is located -/// here to avoid making VMCore depend on DataLayout. -static Value *getUnderlyingObjectWithOffset(Value *V, const DataLayout *TD, - uint64_t &ByteOffset, - unsigned MaxLookup = 6) { - if (!V->getType()->isPointerTy()) - return V; - for (unsigned Count = 0; MaxLookup == 0 || Count < MaxLookup; ++Count) { - if (GEPOperator *GEP = dyn_cast(V)) { - if (!GEP->hasAllConstantIndices()) - return V; - SmallVector Indices(GEP->op_begin() + 1, GEP->op_end()); - ByteOffset += TD->getIndexedOffset(GEP->getPointerOperandType(), - Indices); - V = GEP->getPointerOperand(); - } else if (Operator::getOpcode(V) == Instruction::BitCast) { - V = cast(V)->getOperand(0); - } else if (GlobalAlias *GA = dyn_cast(V)) { - if (GA->mayBeOverridden()) - return V; - V = GA->getAliasee(); - } else { - return V; - } - assert(V->getType()->isPointerTy() && "Unexpected operand type!"); - } - return V; -} - /// isSafeToLoadUnconditionally - Return true if we know that executing a load /// from this value cannot trap. If it is not obviously safe to load from the /// specified pointer, we do a quick local scan of the basic block containing /// ScanFrom, to determine if the address is already accessed. bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, unsigned Align, const DataLayout *TD) { - uint64_t ByteOffset = 0; + int64_t ByteOffset = 0; Value *Base = V; - if (TD) - Base = getUnderlyingObjectWithOffset(V, TD, ByteOffset); + Base = GetPointerBaseWithConstantOffset(V, ByteOffset, TD); + + if (ByteOffset < 0) // out of bounds + return false; Type *BaseType = 0; unsigned BaseAlign = 0; @@ -97,10 +68,10 @@ bool llvm::isSafeToLoadUnconditionally(Value *V, Instruction *ScanFrom, // An alloca is safe to load from as load as it is suitably aligned. BaseType = AI->getAllocatedType(); BaseAlign = AI->getAlignment(); - } else if (const GlobalValue *GV = dyn_cast(Base)) { + } else if (const GlobalVariable *GV = dyn_cast(Base)) { // Global variables are safe to load from but their size cannot be // guaranteed if they are overridden. - if (!isa(GV) && !GV->mayBeOverridden()) { + if (!GV->mayBeOverridden()) { BaseType = GV->getType()->getElementType(); BaseAlign = GV->getAlignment(); } diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index 8341f9d83055..f1ad6506e4ba 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -15,18 +15,19 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/LoopInfo.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Analysis/LoopIterator.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; @@ -213,14 +214,75 @@ bool Loop::isLoopSimplifyForm() const { /// isSafeToClone - Return true if the loop body is safe to clone in practice. /// Routines that reform the loop CFG and split edges often fail on indirectbr. bool Loop::isSafeToClone() const { - // Return false if any loop blocks contain indirectbrs. + // Return false if any loop blocks contain indirectbrs, or there are any calls + // to noduplicate functions. for (Loop::block_iterator I = block_begin(), E = block_end(); I != E; ++I) { - if (isa((*I)->getTerminator())) + if (isa((*I)->getTerminator())) { + return false; + } else if (const InvokeInst *II = dyn_cast((*I)->getTerminator())) { + if (II->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + + for (BasicBlock::iterator BI = (*I)->begin(), BE = (*I)->end(); BI != BE; ++BI) { + if (const CallInst *CI = dyn_cast(BI)) { + if (CI->hasFnAttr(Attribute::NoDuplicate)) + return false; + } + } + } + return true; +} + +bool Loop::isAnnotatedParallel() const { + + BasicBlock *latch = getLoopLatch(); + if (latch == NULL) + return false; + + MDNode *desiredLoopIdMetadata = + latch->getTerminator()->getMetadata("llvm.loop.parallel"); + + if (!desiredLoopIdMetadata) return false; + + // The loop branch contains the parallel loop metadata. In order to ensure + // that any parallel-loop-unaware optimization pass hasn't added loop-carried + // dependencies (thus converted the loop back to a sequential loop), check + // that all the memory instructions in the loop contain parallelism metadata + // that point to the same unique "loop id metadata" the loop branch does. + for (block_iterator BB = block_begin(), BE = block_end(); BB != BE; ++BB) { + for (BasicBlock::iterator II = (*BB)->begin(), EE = (*BB)->end(); + II != EE; II++) { + + if (!II->mayReadOrWriteMemory()) + continue; + + if (!II->getMetadata("llvm.mem.parallel_loop_access")) + return false; + + // The memory instruction can refer to the loop identifier metadata + // directly or indirectly through another list metadata (in case of + // nested parallel loops). The loop identifier metadata refers to + // itself so we can check both cases with the same routine. + MDNode *loopIdMD = + dyn_cast(II->getMetadata("llvm.mem.parallel_loop_access")); + bool loopIdMDFound = false; + for (unsigned i = 0, e = loopIdMD->getNumOperands(); i < e; ++i) { + if (loopIdMD->getOperand(i) == desiredLoopIdMetadata) { + loopIdMDFound = true; + break; + } + } + + if (!loopIdMDFound) + return false; + } } return true; } + /// hasDedicatedExits - Return true if no exit block for the loop /// has a predecessor that is outside the loop. bool Loop::hasDedicatedExits() const { diff --git a/lib/Analysis/MemDepPrinter.cpp b/lib/Analysis/MemDepPrinter.cpp index 8578a63bee1f..d26aaf1b9048 100644 --- a/lib/Analysis/MemDepPrinter.cpp +++ b/lib/Analysis/MemDepPrinter.cpp @@ -10,15 +10,15 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/LLVMContext.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CallSite.h" -#include "llvm/Support/InstIterator.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SetVector.h" using namespace llvm; namespace { diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index 0a539fe75825..d490d5419f75 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -8,24 +8,24 @@ //===----------------------------------------------------------------------===// // // This family of functions identifies calls to builtin functions that allocate -// or free memory. +// or free memory. // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "memory-builtins" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/MemoryBuiltins.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Transforms/Utils/Local.h" using namespace llvm; @@ -88,6 +88,10 @@ static Function *getCalledFunction(const Value *V, bool LookThroughBitCast) { static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, const TargetLibraryInfo *TLI, bool LookThroughBitCast = false) { + // Skip intrinsics + if (isa(V)) + return 0; + Function *Callee = getCalledFunction(V, LookThroughBitCast); if (!Callee) return 0; @@ -132,7 +136,7 @@ static const AllocFnsTy *getAllocationData(const Value *V, AllocType AllocTy, static bool hasNoAliasAttr(const Value *V, bool LookThroughBitCast) { ImmutableCallSite CS(LookThroughBitCast ? V->stripPointerCasts() : V); - return CS && CS.hasFnAttr(Attributes::NoAlias); + return CS && CS.hasFnAttr(Attribute::NoAlias); } @@ -194,12 +198,12 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, const TargetLibraryInfo *TLI, bool LookThroughSExt = false) { if (!CI) - return NULL; + return 0; // The size of the malloc's result type must be known to determine array size. Type *T = getMallocAllocatedType(CI, TLI); if (!T || !T->isSized() || !TD) - return NULL; + return 0; unsigned ElementSize = TD->getTypeAllocSize(T); if (StructType *ST = dyn_cast(T)) @@ -208,15 +212,15 @@ static Value *computeArraySize(const CallInst *CI, const DataLayout *TD, // If malloc call's arg can be determined to be a multiple of ElementSize, // return the multiple. Otherwise, return NULL. Value *MallocArg = CI->getArgOperand(0); - Value *Multiple = NULL; + Value *Multiple = 0; if (ComputeMultiple(MallocArg, ElementSize, Multiple, LookThroughSExt)) return Multiple; - return NULL; + return 0; } -/// isArrayMalloc - Returns the corresponding CallInst if the instruction +/// isArrayMalloc - Returns the corresponding CallInst if the instruction /// is a call to malloc whose array size can be determined and the array size /// is not constant 1. Otherwise, return NULL. const CallInst *llvm::isArrayMalloc(const Value *I, @@ -225,12 +229,12 @@ const CallInst *llvm::isArrayMalloc(const Value *I, const CallInst *CI = extractMallocCall(I, TLI); Value *ArraySize = computeArraySize(CI, TD, TLI); - if (ArraySize && - ArraySize != ConstantInt::get(CI->getArgOperand(0)->getType(), 1)) - return CI; + if (ConstantInt *ConstSize = dyn_cast_or_null(ArraySize)) + if (ConstSize->isOne()) + return CI; // CI is a non-array malloc or we can't figure out that it is an array malloc. - return NULL; + return 0; } /// getMallocType - Returns the PointerType resulting from the malloc call. @@ -241,8 +245,8 @@ const CallInst *llvm::isArrayMalloc(const Value *I, PointerType *llvm::getMallocType(const CallInst *CI, const TargetLibraryInfo *TLI) { assert(isMallocLikeFn(CI, TLI) && "getMallocType and not malloc call"); - - PointerType *MallocType = NULL; + + PointerType *MallocType = 0; unsigned NumOfBitCastUses = 0; // Determine if CallInst has a bitcast use. @@ -262,7 +266,7 @@ PointerType *llvm::getMallocType(const CallInst *CI, return cast(CI->getType()); // Type could not be determined. - return NULL; + return 0; } /// getMallocAllocatedType - Returns the Type allocated by malloc call. @@ -273,10 +277,10 @@ PointerType *llvm::getMallocType(const CallInst *CI, Type *llvm::getMallocAllocatedType(const CallInst *CI, const TargetLibraryInfo *TLI) { PointerType *PT = getMallocType(CI, TLI); - return PT ? PT->getElementType() : NULL; + return PT ? PT->getElementType() : 0; } -/// getMallocArraySize - Returns the array size of a malloc call. If the +/// getMallocArraySize - Returns the array size of a malloc call. If the /// argument passed to malloc is a multiple of the size of the malloced type, /// then return that multiple. For non-array mallocs, the multiple is /// constant 1. Otherwise, return NULL for mallocs whose array size cannot be @@ -300,7 +304,7 @@ const CallInst *llvm::extractCallocCall(const Value *I, /// isFreeCall - Returns non-null if the value is a call to the builtin free() const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { const CallInst *CI = dyn_cast(I); - if (!CI) + if (!CI || isa(CI)) return 0; Function *Callee = CI->getCalledFunction(); if (Callee == 0 || !Callee->isDeclaration()) @@ -317,7 +321,7 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { return 0; // Check free prototype. - // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin + // FIXME: workaround for PR5130, this will be obsolete when a nobuiltin // attribute will exist. FunctionType *FTy = Callee->getFunctionType(); if (!FTy->getReturnType()->isVoidTy()) @@ -360,6 +364,26 @@ bool llvm::getObjectSize(const Value *Ptr, uint64_t &Size, const DataLayout *TD, return true; } +/// \brief Compute the size of the underlying object pointed by Ptr. Returns +/// true and the object size in Size if successful, and false otherwise. +/// If RoundToAlign is true, then Size is rounded up to the aligment of allocas, +/// byval arguments, and global variables. +bool llvm::getUnderlyingObjectSize(const Value *Ptr, uint64_t &Size, + const DataLayout *TD, + const TargetLibraryInfo *TLI, + bool RoundToAlign) { + if (!TD) + return false; + + ObjectSizeOffsetVisitor Visitor(TD, TLI, Ptr->getContext(), RoundToAlign); + SizeOffsetType Data = Visitor.compute(const_cast(Ptr)); + if (!Visitor.knownSize(Data)) + return false; + + Size = Data.first.getZExtValue(); + return true; +} + STATISTIC(ObjectVisitorArgument, "Number of arguments with unsolved size and offset"); @@ -385,20 +409,29 @@ ObjectSizeOffsetVisitor::ObjectSizeOffsetVisitor(const DataLayout *TD, SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { V = V->stripPointerCasts(); - if (Instruction *I = dyn_cast(V)) { - // If we have already seen this instruction, bail out. Cycles can happen in - // unreachable code after constant propagation. - if (!SeenInsts.insert(I)) - return unknown(); + if (isa(V) || isa(V)) { + // Return cached value or insert unknown in cache if size of V was not + // computed yet in order to avoid recursions in PHis. + std::pair CacheVal = + CacheMap.insert(std::make_pair(V, unknown())); + if (!CacheVal.second) + return CacheVal.first->second; + + SizeOffsetType Result; if (GEPOperator *GEP = dyn_cast(V)) - return visitGEPOperator(*GEP); - return visit(*I); + Result = visitGEPOperator(*GEP); + else + Result = visit(cast(*V)); + return CacheMap[V] = Result; } + if (Argument *A = dyn_cast(V)) return visitArgument(*A); if (ConstantPointerNull *P = dyn_cast(V)) return visitConstantPointerNull(*P); + if (GlobalAlias *GA = dyn_cast(V)) + return visitGlobalAlias(*GA); if (GlobalVariable *GV = dyn_cast(V)) return visitGlobalVariable(*GV); if (UndefValue *UV = dyn_cast(V)) @@ -406,8 +439,6 @@ SizeOffsetType ObjectSizeOffsetVisitor::compute(Value *V) { if (ConstantExpr *CE = dyn_cast(V)) { if (CE->getOpcode() == Instruction::IntToPtr) return unknown(); // clueless - if (CE->getOpcode() == Instruction::GetElementPtr) - return visitGEPOperator(cast(*CE)); } DEBUG(dbgs() << "ObjectSizeOffsetVisitor::compute() unhandled value: " << *V @@ -510,14 +541,19 @@ ObjectSizeOffsetVisitor::visitExtractValueInst(ExtractValueInst&) { SizeOffsetType ObjectSizeOffsetVisitor::visitGEPOperator(GEPOperator &GEP) { SizeOffsetType PtrData = compute(GEP.getPointerOperand()); - if (!bothKnown(PtrData) || !GEP.hasAllConstantIndices()) + APInt Offset(IntTyBits, 0); + if (!bothKnown(PtrData) || !GEP.accumulateConstantOffset(*TD, Offset)) return unknown(); - SmallVector Ops(GEP.idx_begin(), GEP.idx_end()); - APInt Offset(IntTyBits,TD->getIndexedOffset(GEP.getPointerOperandType(),Ops)); return std::make_pair(PtrData.first, PtrData.second + Offset); } +SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalAlias(GlobalAlias &GA) { + if (GA.mayBeOverridden()) + return unknown(); + return compute(GA.getAliasee()); +} + SizeOffsetType ObjectSizeOffsetVisitor::visitGlobalVariable(GlobalVariable &GV){ if (!GV.hasDefinitiveInitializer()) return unknown(); @@ -536,9 +572,21 @@ SizeOffsetType ObjectSizeOffsetVisitor::visitLoadInst(LoadInst&) { return unknown(); } -SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode&) { - // too complex to analyze statically. - return unknown(); +SizeOffsetType ObjectSizeOffsetVisitor::visitPHINode(PHINode &PHI) { + if (PHI.getNumIncomingValues() == 0) + return unknown(); + + SizeOffsetType Ret = compute(PHI.getIncomingValue(0)); + if (!bothKnown(Ret)) + return unknown(); + + // Verify that all PHI incoming pointers have the same size and offset. + for (unsigned i = 1, e = PHI.getNumIncomingValues(); i != e; ++i) { + SizeOffsetType EdgeData = compute(PHI.getIncomingValue(i)); + if (!bothKnown(EdgeData) || EdgeData != Ret) + return unknown(); + } + return Ret; } SizeOffsetType ObjectSizeOffsetVisitor::visitSelectInst(SelectInst &I) { @@ -619,6 +667,7 @@ SizeOffsetEvalType ObjectSizeOffsetEvaluator::compute_(Value *V) { } else if (isa(V) || (isa(V) && cast(V)->getOpcode() == Instruction::IntToPtr) || + isa(V) || isa(V)) { // ignore values where we cannot do more than what ObjectSizeVisitor can Result = unknown(); diff --git a/lib/Analysis/MemoryDependenceAnalysis.cpp b/lib/Analysis/MemoryDependenceAnalysis.cpp index 987289049455..2240e9de33eb 100644 --- a/lib/Analysis/MemoryDependenceAnalysis.cpp +++ b/lib/Analysis/MemoryDependenceAnalysis.cpp @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // This file implements an analysis that determines, for a given memory -// operation, what preceding memory operations it depends on. It builds on +// operation, what preceding memory operations it depends on. It builds on // alias analysis information, and tries to provide a lazy, caching interface to // a common kind of alias information query. // @@ -16,21 +16,21 @@ #define DEBUG_TYPE "memdep" #include "llvm/Analysis/MemoryDependenceAnalysis.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Function.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/MemoryBuiltins.h" #include "llvm/Analysis/PHITransAddr.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/Support/PredIteratorCache.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/PredIteratorCache.h" using namespace llvm; STATISTIC(NumCacheNonLocal, "Number of fully cached non-local responses"); @@ -52,7 +52,7 @@ STATISTIC(NumCacheCompleteNonLocalPtr, static const int BlockScanLimit = 500; char MemoryDependenceAnalysis::ID = 0; - + // Register this pass... INITIALIZE_PASS_BEGIN(MemoryDependenceAnalysis, "memdep", "Memory Dependence Analysis", false, true) @@ -99,7 +99,7 @@ bool MemoryDependenceAnalysis::runOnFunction(Function &) { /// RemoveFromReverseMap - This is a helper function that removes Val from /// 'Inst's set in ReverseMap. If the set becomes empty, remove Inst's entry. template -static void RemoveFromReverseMap(DenseMap > &ReverseMap, Instruction *Inst, KeyTy Val) { typename DenseMap >::iterator @@ -123,7 +123,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (LI->isUnordered()) { Loc = AA->getLocation(LI); return AliasAnalysis::Ref; - } else if (LI->getOrdering() == Monotonic) { + } + if (LI->getOrdering() == Monotonic) { Loc = AA->getLocation(LI); return AliasAnalysis::ModRef; } @@ -135,7 +136,8 @@ AliasAnalysis::ModRefResult GetLocation(const Instruction *Inst, if (SI->isUnordered()) { Loc = AA->getLocation(SI); return AliasAnalysis::Mod; - } else if (SI->getOrdering() == Monotonic) { + } + if (SI->getOrdering() == Monotonic) { Loc = AA->getLocation(SI); return AliasAnalysis::ModRef; } @@ -196,13 +198,13 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, // Walk backwards through the block, looking for dependencies while (ScanIt != BB->begin()) { // Limit the amount of scanning we do so we don't end up with quadratic - // running time on extreme testcases. + // running time on extreme testcases. --Limit; if (!Limit) return MemDepResult::getUnknown(); Instruction *Inst = --ScanIt; - + // If this inst is a memory op, get the pointer it accessed AliasAnalysis::Location Loc; AliasAnalysis::ModRefResult MR = GetLocation(Inst, Loc, AA); @@ -251,7 +253,7 @@ getCallSiteDependencyFrom(CallSite CS, bool isReadOnlyCall, /// /// MemLocBase, MemLocOffset are lazily computed here the first time the /// base/offs of memloc is needed. -static bool +static bool isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, const Value *&MemLocBase, int64_t &MemLocOffs, @@ -262,7 +264,7 @@ isLoadLoadClobberIfExtendedToFullWidth(const AliasAnalysis::Location &MemLoc, // If we haven't already computed the base/offset of MemLoc, do so now. if (MemLocBase == 0) - MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, *TD); + MemLocBase = GetPointerBaseWithConstantOffset(MemLoc.Ptr, MemLocOffs, TD); unsigned Size = MemoryDependenceAnalysis:: getLoadLoadClobberFullWidthSize(MemLocBase, MemLocOffs, MemLoc.Size, @@ -283,25 +285,31 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, const DataLayout &TD) { // We can only extend simple integer loads. if (!isa(LI->getType()) || !LI->isSimple()) return 0; - + + // Load widening is hostile to ThreadSanitizer: it may cause false positives + // or make the reports more cryptic (access sizes are wrong). + if (LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeThread)) + return 0; + // Get the base of this load. int64_t LIOffs = 0; - const Value *LIBase = - GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, TD); - + const Value *LIBase = + GetPointerBaseWithConstantOffset(LI->getPointerOperand(), LIOffs, &TD); + // If the two pointers are not based on the same pointer, we can't tell that // they are related. if (LIBase != MemLocBase) return 0; - + // Okay, the two values are based on the same pointer, but returned as // no-alias. This happens when we have things like two byte loads at "P+1" // and "P+3". Check to see if increasing the size of the "LI" load up to its // alignment (or the largest native integer type) will allow us to load all // the bits required by MemLoc. - + // If MemLoc is before LI, then no widening of LI will help us out. if (MemLocOffs < LIOffs) return 0; - + // Get the alignment of the load in bytes. We assume that it is safe to load // any legal integer up to this size without a problem. For example, if we're // looking at an i8 load on x86-32 that is known 1024 byte aligned, we can @@ -310,15 +318,15 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, unsigned LoadAlign = LI->getAlignment(); int64_t MemLocEnd = MemLocOffs+MemLocSize; - + // If no amount of rounding up will let MemLoc fit into LI, then bail out. if (LIOffs+LoadAlign < MemLocEnd) return 0; - + // This is the size of the load to try. Start with the next larger power of // two. unsigned NewLoadByteSize = LI->getType()->getPrimitiveSizeInBits()/8U; NewLoadByteSize = NextPowerOf2(NewLoadByteSize); - + while (1) { // If this load size is bigger than our known alignment or would not fit // into a native integer register, then we fail. @@ -327,8 +335,8 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, return 0; if (LIOffs+NewLoadByteSize > MemLocEnd && - LI->getParent()->getParent()->getFnAttributes(). - hasAttribute(Attributes::AddressSafety)) + LI->getParent()->getParent()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::SanitizeAddress)) // We will be reading past the location accessed by the original program. // While this is safe in a regular build, Address Safety analysis tools // may start reporting false warnings. So, don't do widening. @@ -337,7 +345,7 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, // If a load of this width would include all of MemLoc, then we succeed. if (LIOffs+NewLoadByteSize >= MemLocEnd) return NewLoadByteSize; - + NewLoadByteSize <<= 1; } } @@ -345,15 +353,23 @@ getLoadLoadClobberFullWidthSize(const Value *MemLocBase, int64_t MemLocOffs, /// getPointerDependencyFrom - Return the instruction on which a memory /// location depends. If isLoad is true, this routine ignores may-aliases with /// read-only operations. If isLoad is false, this routine ignores may-aliases -/// with reads from read-only locations. +/// with reads from read-only locations. If possible, pass the query +/// instruction as well; this function may take advantage of the metadata +/// annotated to the query instruction to refine the result. MemDepResult MemoryDependenceAnalysis:: -getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, - BasicBlock::iterator ScanIt, BasicBlock *BB) { +getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, + BasicBlock::iterator ScanIt, BasicBlock *BB, + Instruction *QueryInst) { const Value *MemLocBase = 0; int64_t MemLocOffset = 0; - unsigned Limit = BlockScanLimit; + bool isInvariantLoad = false; + if (isLoad && QueryInst) { + LoadInst *LI = dyn_cast(QueryInst); + if (LI && LI->getMetadata(LLVMContext::MD_invariant_load) != 0) + isInvariantLoad = true; + } // Walk backwards through the basic block, looking for dependencies. while (ScanIt != BB->begin()) { @@ -368,7 +384,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (IntrinsicInst *II = dyn_cast(Inst)) { // Debug intrinsics don't (and can't) cause dependences. if (isa(II)) continue; - + // If we reach a lifetime begin or end marker, then the query ends here // because the value is undefined. if (II->getIntrinsicID() == Intrinsic::lifetime_start) { @@ -392,10 +408,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(LI); AliasAnalysis::Location LoadLoc = AA->getLocation(LI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(LoadLoc, MemLoc); - + if (isLoad) { if (R == AliasAnalysis::NoAlias) { // If this is an over-aligned integer load (for example, @@ -409,10 +425,10 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, isLoadLoadClobberIfExtendedToFullWidth(MemLoc, MemLocBase, MemLocOffset, LI, TD)) return MemDepResult::getClobber(Inst); - + continue; } - + // Must aliased loads are defs of each other. if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); @@ -427,7 +443,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, if (R == AliasAnalysis::PartialAlias) return MemDepResult::getClobber(Inst); #endif - + // Random may-alias loads don't depend on each other without a // dependence. continue; @@ -444,7 +460,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Stores depend on may/must aliased loads. return MemDepResult::getDef(Inst); } - + if (StoreInst *SI = dyn_cast(Inst)) { // Atomic stores have complications involved. // FIXME: This is overly conservative. @@ -460,14 +476,16 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, // Ok, this store might clobber the query pointer. Check to see if it is // a must alias: in this case, we want to return this as a def. AliasAnalysis::Location StoreLoc = AA->getLocation(SI); - + // If we found a pointer, check if it could be the same as our pointer. AliasAnalysis::AliasResult R = AA->alias(StoreLoc, MemLoc); - + if (R == AliasAnalysis::NoAlias) continue; if (R == AliasAnalysis::MustAlias) return MemDepResult::getDef(Inst); + if (isInvariantLoad) + continue; return MemDepResult::getClobber(Inst); } @@ -482,7 +500,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, const TargetLibraryInfo *TLI = AA->getTargetLibraryInfo(); if (isa(Inst) || isNoAliasFn(Inst, TLI)) { const Value *AccessPtr = GetUnderlyingObject(MemLoc.Ptr, TD); - + if (AccessPtr == Inst || AA->isMustAlias(Inst, AccessPtr)) return MemDepResult::getDef(Inst); // Be conservative if the accessed pointer may alias the allocation. @@ -516,7 +534,7 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, return MemDepResult::getClobber(Inst); } } - + // No dependence found. If this is the entry block of the function, it is // unknown, otherwise it is non-local. if (BB != &BB->getParent()->getEntryBlock()) @@ -528,25 +546,25 @@ getPointerDependencyFrom(const AliasAnalysis::Location &MemLoc, bool isLoad, /// depends. MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { Instruction *ScanPos = QueryInst; - + // Check for a cached result MemDepResult &LocalCache = LocalDeps[QueryInst]; - + // If the cached entry is non-dirty, just return it. Note that this depends // on MemDepResult's default constructing to 'dirty'. if (!LocalCache.isDirty()) return LocalCache; - + // Otherwise, if we have a dirty entry, we know we can start the scan at that // instruction, which may save us some work. if (Instruction *Inst = LocalCache.getInst()) { ScanPos = Inst; - + RemoveFromReverseMap(ReverseLocalDeps, Inst, QueryInst); } - + BasicBlock *QueryParent = QueryInst->getParent(); - + // Do the scan. if (BasicBlock::iterator(QueryInst) == QueryParent->begin()) { // No dependence found. If this is the entry block of the function, it is @@ -565,7 +583,7 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { isLoad |= II->getIntrinsicID() == Intrinsic::lifetime_start; LocalCache = getPointerDependencyFrom(MemLoc, isLoad, ScanPos, - QueryParent); + QueryParent, QueryInst); } else if (isa(QueryInst) || isa(QueryInst)) { CallSite QueryCS(QueryInst); bool isReadOnly = AA->onlyReadsMemory(QueryCS); @@ -575,11 +593,11 @@ MemDepResult MemoryDependenceAnalysis::getDependency(Instruction *QueryInst) { // Non-memory instruction. LocalCache = MemDepResult::getUnknown(); } - + // Remember the result! if (Instruction *I = LocalCache.getInst()) ReverseLocalDeps[I].insert(QueryInst); - + return LocalCache; } @@ -620,7 +638,7 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { /// the uncached case, this starts out as the set of predecessors we care /// about. SmallVector DirtyBlocks; - + if (!Cache.empty()) { // Okay, we have a cache entry. If we know it is not dirty, just return it // with no computation. @@ -628,17 +646,17 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { ++NumCacheNonLocal; return Cache; } - + // If we already have a partially computed set of results, scan them to // determine what is dirty, seeding our initial DirtyBlocks worklist. for (NonLocalDepInfo::iterator I = Cache.begin(), E = Cache.end(); I != E; ++I) if (I->getResult().isDirty()) DirtyBlocks.push_back(I->getBB()); - + // Sort the cache so that we can do fast binary search lookups below. std::sort(Cache.begin(), Cache.end()); - + ++NumCacheDirtyNonLocal; //cerr << "CACHED CASE: " << DirtyBlocks.size() << " dirty: " // << Cache.size() << " cached: " << *QueryInst; @@ -649,45 +667,45 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { DirtyBlocks.push_back(*PI); ++NumUncacheNonLocal; } - + // isReadonlyCall - If this is a read-only call, we can be more aggressive. bool isReadonlyCall = AA->onlyReadsMemory(QueryCS); SmallPtrSet Visited; - + unsigned NumSortedEntries = Cache.size(); DEBUG(AssertSorted(Cache)); - + // Iterate while we still have blocks to update. while (!DirtyBlocks.empty()) { BasicBlock *DirtyBB = DirtyBlocks.back(); DirtyBlocks.pop_back(); - + // Already processed this block? if (!Visited.insert(DirtyBB)) continue; - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. DEBUG(AssertSorted(Cache, NumSortedEntries)); - NonLocalDepInfo::iterator Entry = + NonLocalDepInfo::iterator Entry = std::upper_bound(Cache.begin(), Cache.begin()+NumSortedEntries, NonLocalDepEntry(DirtyBB)); if (Entry != Cache.begin() && prior(Entry)->getBB() == DirtyBB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; - if (Entry != Cache.begin()+NumSortedEntries && + if (Entry != Cache.begin()+NumSortedEntries && Entry->getBB() == DirtyBB) { // If we already have an entry, and if it isn't already dirty, the block // is done. if (!Entry->getResult().isDirty()) continue; - + // Otherwise, remember this slot so we can update the value. ExistingResult = &*Entry; } - + // If the dirty entry has a pointer, start scanning from it so we don't have // to rescan the entire block. BasicBlock::iterator ScanPos = DirtyBB->end(); @@ -699,10 +717,10 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { QueryCS.getInstruction()); } } - + // Find out if this block has a local dependency for QueryInst. MemDepResult Dep; - + if (ScanPos != DirtyBB->begin()) { Dep = getCallSiteDependencyFrom(QueryCS, isReadonlyCall,ScanPos, DirtyBB); } else if (DirtyBB != &DirtyBB->getParent()->getEntryBlock()) { @@ -712,14 +730,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { } else { Dep = MemDepResult::getNonFuncLocal(); } - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache.push_back(NonLocalDepEntry(DirtyBB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the association! if (!Dep.isNonLocal()) { @@ -728,14 +746,14 @@ MemoryDependenceAnalysis::getNonLocalCallDependency(CallSite QueryCS) { if (Instruction *Inst = Dep.getInst()) ReverseNonLocalDeps[Inst].insert(QueryCS.getInstruction()); } else { - + // If the block *is* completely transparent to the load, we need to check // the predecessors of this block. Add them to our worklist. for (BasicBlock **PI = PredCache->GetPreds(DirtyBB); *PI; ++PI) DirtyBlocks.push_back(*PI); } } - + return Cache; } @@ -753,9 +771,9 @@ getNonLocalPointerDependency(const AliasAnalysis::Location &Loc, bool isLoad, assert(Loc.Ptr->getType()->isPointerTy() && "Can't get pointer deps of a non-pointer!"); Result.clear(); - + PHITransAddr Address(const_cast(Loc.Ptr), TD); - + // This is the set of blocks we've inspected, and the pointer we consider in // each block. Because of critical edges, we currently bail out if querying // a block with multiple different pointers. This can happen during PHI @@ -778,7 +796,7 @@ MemDepResult MemoryDependenceAnalysis:: GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, bool isLoad, BasicBlock *BB, NonLocalDepInfo *Cache, unsigned NumSortedEntries) { - + // Do a binary search to see if we already have an entry for this block in // the cache set. If so, find it. NonLocalDepInfo::iterator Entry = @@ -786,18 +804,18 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, NonLocalDepEntry(BB)); if (Entry != Cache->begin() && (Entry-1)->getBB() == BB) --Entry; - + NonLocalDepEntry *ExistingResult = 0; if (Entry != Cache->begin()+NumSortedEntries && Entry->getBB() == BB) ExistingResult = &*Entry; - + // If we have a cached entry, and it is non-dirty, use it as the value for // this dependency. if (ExistingResult && !ExistingResult->getResult().isDirty()) { ++NumCacheNonLocalPtr; return ExistingResult->getResult(); - } - + } + // Otherwise, we have to scan for the value. If we have a dirty cache // entry, start scanning from its position, otherwise we scan from the end // of the block. @@ -807,30 +825,30 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, "Instruction invalidated?"); ++NumCacheDirtyNonLocalPtr; ScanPos = ExistingResult->getResult().getInst(); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. ValueIsLoadPair CacheKey(Loc.Ptr, isLoad); RemoveFromReverseMap(ReverseNonLocalPtrDeps, ScanPos, CacheKey); } else { ++NumUncacheNonLocalPtr; } - + // Scan the block for the dependency. MemDepResult Dep = getPointerDependencyFrom(Loc, isLoad, ScanPos, BB); - + // If we had a dirty entry for the block, update it. Otherwise, just add // a new entry. if (ExistingResult) ExistingResult->setResult(Dep); else Cache->push_back(NonLocalDepEntry(BB, Dep)); - + // If the block has a dependency (i.e. it isn't completely transparent to // the value), remember the reverse association because we just added it // to Cache! if (!Dep.isDef() && !Dep.isClobber()) return Dep; - + // Keep the ReverseNonLocalPtrDeps map up to date so we can efficiently // update MemDep when we remove instructions. Instruction *Inst = Dep.getInst(); @@ -843,7 +861,7 @@ GetNonLocalInfoForBlock(const AliasAnalysis::Location &Loc, /// SortNonLocalDepInfoCache - Sort the a NonLocalDepInfo cache, given a certain /// number of elements in the array that are already properly ordered. This is /// optimized for the case when only a few entries are added. -static void +static void SortNonLocalDepInfoCache(MemoryDependenceAnalysis::NonLocalDepInfo &Cache, unsigned NumSortedEntries) { switch (Cache.size() - NumSortedEntries) { @@ -895,7 +913,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, SmallVectorImpl &Result, DenseMap &Visited, bool SkipFirstBlock) { - + // Look up the cached info for Pointer. ValueIsLoadPair CacheKey(Pointer.getAddr(), isLoad); @@ -909,7 +927,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Get the NLPI for CacheKey, inserting one into the map if it doesn't // already have one. - std::pair Pair = + std::pair Pair = NonLocalPointerDeps.insert(std::make_pair(CacheKey, InitialNLPI)); NonLocalPointerInfo *CacheInfo = &Pair.first->second; @@ -971,14 +989,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DenseMap::iterator VI = Visited.find(I->getBB()); if (VI == Visited.end() || VI->second == Pointer.getAddr()) continue; - + // We have a pointer mismatch in a block. Just return clobber, saying // that something was clobbered in this result. We could also do a // non-fully cached query, but there is little point in doing this. return true; } } - + Value *Addr = Pointer.getAddr(); for (NonLocalDepInfo::iterator I = Cache->begin(), E = Cache->end(); I != E; ++I) { @@ -989,7 +1007,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, ++NumCacheCompleteNonLocalPtr; return false; } - + // Otherwise, either this is a new block, a block with an invalid cache // pointer or one that we're about to invalidate by putting more info into it // than its valid cache info. If empty, the result will be valid cache info, @@ -998,10 +1016,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, CacheInfo->Pair = BBSkipFirstBlockPair(StartBB, SkipFirstBlock); else CacheInfo->Pair = BBSkipFirstBlockPair(); - + SmallVector Worklist; Worklist.push_back(StartBB); - + // PredList used inside loop. SmallVector, 16> PredList; @@ -1012,10 +1030,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // revisit blocks after we insert info for them. unsigned NumSortedEntries = Cache->size(); DEBUG(AssertSorted(*Cache)); - + while (!Worklist.empty()) { BasicBlock *BB = Worklist.pop_back_val(); - + // Skip the first block if we have it. if (!SkipFirstBlock) { // Analyze the dependency of *Pointer in FromBB. See if we already have @@ -1027,14 +1045,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, DEBUG(AssertSorted(*Cache, NumSortedEntries)); MemDepResult Dep = GetNonLocalInfoForBlock(Loc, isLoad, BB, Cache, NumSortedEntries); - + // If we got a Def or Clobber, add this to the list of results. if (!Dep.isNonLocal() && DT->isReachableFromEntry(BB)) { Result.push_back(NonLocalDepResult(BB, Dep, Pointer.getAddr())); continue; } } - + // If 'Pointer' is an instruction defined in this block, then we need to do // phi translation to change it into a value live in the predecessor block. // If not, we just add the predecessors to the worklist and scan them with @@ -1051,7 +1069,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, NewBlocks.push_back(*PI); continue; } - + // If we have seen this block before, but it was with a different // pointer then we have a phi translation failure and we have to treat // this as a clobber. @@ -1066,12 +1084,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, Worklist.append(NewBlocks.begin(), NewBlocks.end()); continue; } - + // We do need to do phi translation, if we know ahead of time we can't phi // translate this value, don't even try. if (!Pointer.IsPotentiallyPHITranslatable()) goto PredTranslationFailure; - + // We may have added values to the cache list before this PHI translation. // If so, we haven't done anything to ensure that the cache remains sorted. // Sort it now (if needed) so that recursive invocations of @@ -1094,7 +1112,7 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, PredPointer.PHITranslateValue(BB, Pred, 0); Value *PredPtrVal = PredPointer.getAddr(); - + // Check to see if we have already visited this pred block with another // pointer. If so, we can't do this lookup. This failure can occur // with PHI translation when a critical edge exists and the PHI node in @@ -1111,14 +1129,14 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // the analysis and can ignore it. if (InsertRes.first->second == PredPtrVal) continue; - + // Otherwise, the block was previously analyzed with a different // pointer. We can't represent the result of this case, so we just // treat this as a phi translation failure. // Make sure to clean up the Visited map before continuing on to // PredTranslationFailure. - for (unsigned i = 0; i < PredList.size(); i++) + for (unsigned i = 0, n = PredList.size(); i < n; ++i) Visited.erase(PredList[i].first); goto PredTranslationFailure; @@ -1127,10 +1145,10 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // Actually process results here; this need to be a separate loop to avoid // calling getNonLocalPointerDepFromBB for blocks we don't want to return - // any results for. (getNonLocalPointerDepFromBB will modify our + // any results for. (getNonLocalPointerDepFromBB will modify our // datastructures in ways the code after the PredTranslationFailure label // doesn't expect.) - for (unsigned i = 0; i < PredList.size(); i++) { + for (unsigned i = 0, n = PredList.size(); i < n; ++i) { BasicBlock *Pred = PredList[i].first; PHITransAddr &PredPointer = PredList[i].second; Value *PredPtrVal = PredPointer.getAddr(); @@ -1170,12 +1188,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, continue; } } - + // Refresh the CacheInfo/Cache pointer so that it isn't invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); - + // Since we did phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all @@ -1188,20 +1206,20 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // The following code is "failure"; we can't produce a sane translation // for the given block. It assumes that we haven't modified any of // our datastructures while processing the current block. - + if (Cache == 0) { // Refresh the CacheInfo/Cache pointer if it got invalidated. CacheInfo = &NonLocalPointerDeps[CacheKey]; Cache = &CacheInfo->NonLocalDeps; NumSortedEntries = Cache->size(); } - + // Since we failed phi translation, the "Cache" set won't contain all of the // results for the query. This is ok (we can still use it to accelerate // specific block queries) but we can't do the fastpath "return all // results from the set". Clear out the indicator for this. CacheInfo->Pair = BBSkipFirstBlockPair(); - + // If *nothing* works, mark the pointer as unknown. // // If this is the magic first block, return this as a clobber of the whole @@ -1209,12 +1227,12 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, // we have to bail out. if (SkipFirstBlock) return true; - + for (NonLocalDepInfo::reverse_iterator I = Cache->rbegin(); ; ++I) { assert(I != Cache->rend() && "Didn't find current block??"); if (I->getBB() != BB) continue; - + assert(I->getResult().isNonLocal() && "Should only be here with transparent block"); I->setResult(MemDepResult::getUnknown()); @@ -1234,23 +1252,23 @@ getNonLocalPointerDepFromBB(const PHITransAddr &Pointer, /// CachedNonLocalPointerInfo, remove it. void MemoryDependenceAnalysis:: RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair P) { - CachedNonLocalPointerInfo::iterator It = + CachedNonLocalPointerInfo::iterator It = NonLocalPointerDeps.find(P); if (It == NonLocalPointerDeps.end()) return; - + // Remove all of the entries in the BB->val map. This involves removing // instructions from the reverse map. NonLocalDepInfo &PInfo = It->second.NonLocalDeps; - + for (unsigned i = 0, e = PInfo.size(); i != e; ++i) { Instruction *Target = PInfo[i].getResult().getInst(); if (Target == 0) continue; // Ignore non-local dep results. assert(Target->getParent() == PInfo[i].getBB()); - + // Eliminating the dirty entry from 'Cache', so update the reverse info. RemoveFromReverseMap(ReverseNonLocalPtrDeps, Target, P); } - + // Remove P from NonLocalPointerDeps (which deletes NonLocalDepInfo). NonLocalPointerDeps.erase(It); } @@ -1305,20 +1323,20 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { // Remove this local dependency info. LocalDeps.erase(LocalDepEntry); } - + // If we have any cached pointer dependencies on this instruction, remove // them. If the instruction has non-pointer type, then it can't be a pointer // base. - + // Remove it from both the load info and the store info. The instruction // can't be in either of these maps if it is non-pointer. if (RemInst->getType()->isPointerTy()) { RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, false)); RemoveCachedNonLocalPointerDependencies(ValueIsLoadPair(RemInst, true)); } - + // Loop over all of the things that depend on the instruction we're removing. - // + // SmallVector, 8> ReverseDepsToAdd; // If we find RemInst as a clobber or Def in any of the maps for other values, @@ -1330,29 +1348,29 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { MemDepResult NewDirtyVal; if (!RemInst->isTerminator()) NewDirtyVal = MemDepResult::getDirty(++BasicBlock::iterator(RemInst)); - + ReverseDepMapType::iterator ReverseDepIt = ReverseLocalDeps.find(RemInst); if (ReverseDepIt != ReverseLocalDeps.end()) { SmallPtrSet &ReverseDeps = ReverseDepIt->second; // RemInst can't be the terminator if it has local stuff depending on it. assert(!ReverseDeps.empty() && !isa(RemInst) && "Nothing can locally depend on a terminator"); - + for (SmallPtrSet::iterator I = ReverseDeps.begin(), E = ReverseDeps.end(); I != E; ++I) { Instruction *InstDependingOnRemInst = *I; assert(InstDependingOnRemInst != RemInst && "Already removed our local dep info"); - + LocalDeps[InstDependingOnRemInst] = NewDirtyVal; - + // Make sure to remember that new things depend on NewDepInst. assert(NewDirtyVal.getInst() && "There is no way something else can have " "a local dep on this if it is a terminator!"); - ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), + ReverseDepsToAdd.push_back(std::make_pair(NewDirtyVal.getInst(), InstDependingOnRemInst)); } - + ReverseLocalDeps.erase(ReverseDepIt); // Add new reverse deps after scanning the set, to avoid invalidating the @@ -1363,25 +1381,25 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + ReverseDepIt = ReverseNonLocalDeps.find(RemInst); if (ReverseDepIt != ReverseNonLocalDeps.end()) { SmallPtrSet &Set = ReverseDepIt->second; for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { assert(*I != RemInst && "Already removed NonLocalDep info for RemInst"); - + PerInstNLInfo &INLD = NonLocalDeps[*I]; // The information is now dirty! INLD.second = true; - - for (NonLocalDepInfo::iterator DI = INLD.first.begin(), + + for (NonLocalDepInfo::iterator DI = INLD.first.begin(), DE = INLD.first.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NextI = NewDirtyVal.getInst()) ReverseDepsToAdd.push_back(std::make_pair(NextI, *I)); } @@ -1396,7 +1414,7 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { ReverseDepsToAdd.pop_back(); } } - + // If the instruction is in ReverseNonLocalPtrDeps then it appears as a // value in the NonLocalPointerDeps info. ReverseNonLocalPtrDepTy::iterator ReversePtrDepIt = @@ -1404,45 +1422,45 @@ void MemoryDependenceAnalysis::removeInstruction(Instruction *RemInst) { if (ReversePtrDepIt != ReverseNonLocalPtrDeps.end()) { SmallPtrSet &Set = ReversePtrDepIt->second; SmallVector,8> ReversePtrDepsToAdd; - + for (SmallPtrSet::iterator I = Set.begin(), E = Set.end(); I != E; ++I) { ValueIsLoadPair P = *I; assert(P.getPointer() != RemInst && "Already removed NonLocalPointerDeps info for RemInst"); - + NonLocalDepInfo &NLPDI = NonLocalPointerDeps[P].NonLocalDeps; - + // The cache is not valid for any specific block anymore. NonLocalPointerDeps[P].Pair = BBSkipFirstBlockPair(); - + // Update any entries for RemInst to use the instruction after it. for (NonLocalDepInfo::iterator DI = NLPDI.begin(), DE = NLPDI.end(); DI != DE; ++DI) { if (DI->getResult().getInst() != RemInst) continue; - + // Convert to a dirty entry for the subsequent instruction. DI->setResult(NewDirtyVal); - + if (Instruction *NewDirtyInst = NewDirtyVal.getInst()) ReversePtrDepsToAdd.push_back(std::make_pair(NewDirtyInst, P)); } - + // Re-sort the NonLocalDepInfo. Changing the dirty entry to its // subsequent value may invalidate the sortedness. std::sort(NLPDI.begin(), NLPDI.end()); } - + ReverseNonLocalPtrDeps.erase(ReversePtrDepIt); - + while (!ReversePtrDepsToAdd.empty()) { ReverseNonLocalPtrDeps[ReversePtrDepsToAdd.back().first] .insert(ReversePtrDepsToAdd.back().second); ReversePtrDepsToAdd.pop_back(); } } - - + + assert(!NonLocalDeps.count(RemInst) && "RemInst got reinserted?"); AA->deleteValue(RemInst); DEBUG(verifyRemoved(RemInst)); @@ -1456,7 +1474,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { assert(I->second.getInst() != D && "Inst occurs in data structures"); } - + for (CachedNonLocalPointerInfo::const_iterator I =NonLocalPointerDeps.begin(), E = NonLocalPointerDeps.end(); I != E; ++I) { assert(I->first.getPointer() != D && "Inst occurs in NLPD map key"); @@ -1465,7 +1483,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { II != E; ++II) assert(II->getResult().getInst() != D && "Inst occurs as NLPD value"); } - + for (NonLocalDepMapType::const_iterator I = NonLocalDeps.begin(), E = NonLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1474,7 +1492,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = INLD.first.end(); II != EE; ++II) assert(II->getResult().getInst() != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseLocalDeps.begin(), E = ReverseLocalDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in data structures"); @@ -1482,7 +1500,7 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseDepMapType::const_iterator I = ReverseNonLocalDeps.begin(), E = ReverseNonLocalDeps.end(); I != E; ++I) { @@ -1491,17 +1509,17 @@ void MemoryDependenceAnalysis::verifyRemoved(Instruction *D) const { EE = I->second.end(); II != EE; ++II) assert(*II != D && "Inst occurs in data structures"); } - + for (ReverseNonLocalPtrDepTy::const_iterator I = ReverseNonLocalPtrDeps.begin(), E = ReverseNonLocalPtrDeps.end(); I != E; ++I) { assert(I->first != D && "Inst occurs in rev NLPD map"); - + for (SmallPtrSet::const_iterator II = I->second.begin(), E = I->second.end(); II != E; ++II) assert(*II != ValueIsLoadPair(D, false) && *II != ValueIsLoadPair(D, true) && "Inst occurs in ReverseNonLocalPtrDeps map"); } - + } diff --git a/lib/Analysis/ModuleDebugInfoPrinter.cpp b/lib/Analysis/ModuleDebugInfoPrinter.cpp index f8c751481976..03415375263a 100644 --- a/lib/Analysis/ModuleDebugInfoPrinter.cpp +++ b/lib/Analysis/ModuleDebugInfoPrinter.cpp @@ -16,13 +16,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Assembly/Writer.h" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Pass.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; namespace { diff --git a/lib/Analysis/NoAliasAnalysis.cpp b/lib/Analysis/NoAliasAnalysis.cpp index 2eb4137c533a..907e9621baed 100644 --- a/lib/Analysis/NoAliasAnalysis.cpp +++ b/lib/Analysis/NoAliasAnalysis.cpp @@ -12,10 +12,10 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Pass.h" -#include "llvm/DataLayout.h" using namespace llvm; namespace { diff --git a/lib/Analysis/PHITransAddr.cpp b/lib/Analysis/PHITransAddr.cpp index c35737e4724c..e6af0663feaa 100644 --- a/lib/Analysis/PHITransAddr.cpp +++ b/lib/Analysis/PHITransAddr.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/PHITransAddr.h" -#include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" +#include "llvm/Analysis/ValueTracking.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/Analysis/PathNumbering.cpp b/lib/Analysis/PathNumbering.cpp index d4ad72648166..30d213b77576 100644 --- a/lib/Analysis/PathNumbering.cpp +++ b/lib/Analysis/PathNumbering.cpp @@ -25,24 +25,23 @@ #define DEBUG_TYPE "ball-larus-numbering" #include "llvm/Analysis/PathNumbering.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InstrTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/TypeBuilder.h" #include "llvm/Pass.h" -#include "llvm/TypeBuilder.h" #include "llvm/Support/CFG.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include +#include #include #include #include -#include using namespace llvm; diff --git a/lib/Analysis/PathProfileInfo.cpp b/lib/Analysis/PathProfileInfo.cpp index b361d3f4fa94..bc53221d3176 100644 --- a/lib/Analysis/PathProfileInfo.cpp +++ b/lib/Analysis/PathProfileInfo.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-info" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/PathProfileInfo.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Analysis/PathProfileInfo.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include using namespace llvm; diff --git a/lib/Analysis/PathProfileVerifier.cpp b/lib/Analysis/PathProfileVerifier.cpp index 0fcdfe75aefd..48d7d05d788f 100644 --- a/lib/Analysis/PathProfileVerifier.cpp +++ b/lib/Analysis/PathProfileVerifier.cpp @@ -13,15 +13,14 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "path-profile-verifier" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfoTypes.h" #include "llvm/Analysis/PathProfileInfo.h" -#include "llvm/Support/Debug.h" +#include "llvm/Analysis/ProfileInfoTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - #include using namespace llvm; @@ -85,7 +84,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { for (Module::iterator F = M.begin(), E = M.end(); F != E; ++F) { if (F->isDeclaration()) continue; - arrayMap[0][F->begin()][0] = i++; + arrayMap[(BasicBlock*)0][F->begin()][0] = i++; for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { TerminatorInst *TI = BB->getTerminator(); @@ -126,7 +125,7 @@ bool PathProfileVerifier::runOnModule (Module &M) { << currentPath->getCount() << "\n"); // setup the entry edge (normally path profiling doesn't care about this) if (currentPath->getFirstBlockInPath() == &F->getEntryBlock()) - edgeArray[arrayMap[0][currentPath->getFirstBlockInPath()][0]] + edgeArray[arrayMap[(BasicBlock*)0][currentPath->getFirstBlockInPath()][0]] += currentPath->getCount(); for( ProfilePathEdgeIterator nextEdge = pev->begin(), diff --git a/lib/Analysis/PostDominators.cpp b/lib/Analysis/PostDominators.cpp index 6ed27297923f..96804a01edc6 100644 --- a/lib/Analysis/PostDominators.cpp +++ b/lib/Analysis/PostDominators.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "postdomtree" #include "llvm/Analysis/PostDominators.h" -#include "llvm/Instructions.h" -#include "llvm/Support/CFG.h" -#include "llvm/Support/Debug.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/SetOperations.h" -#include "llvm/Assembly/Writer.h" #include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/Analysis/ProfileDataLoader.cpp b/lib/Analysis/ProfileDataLoader.cpp index a4f634af531e..d7f444b4b6d7 100644 --- a/lib/Analysis/ProfileDataLoader.cpp +++ b/lib/Analysis/ProfileDataLoader.cpp @@ -12,12 +12,12 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" -#include "llvm/Analysis/ProfileDataLoader.h" #include "llvm/Analysis/ProfileDataTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include diff --git a/lib/Analysis/ProfileDataLoaderPass.cpp b/lib/Analysis/ProfileDataLoaderPass.cpp index c43cff05a4da..2ee0093a8f57 100644 --- a/lib/Analysis/ProfileDataLoaderPass.cpp +++ b/lib/Analysis/ProfileDataLoaderPass.cpp @@ -15,22 +15,22 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-metadata-loader" -#include "llvm/ADT/ArrayRef.h" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/LLVMContext.h" -#include "llvm/MDBuilder.h" -#include "llvm/Metadata.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileDataLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; STATISTIC(NumEdgesRead, "The # of edges read."); diff --git a/lib/Analysis/ProfileEstimatorPass.cpp b/lib/Analysis/ProfileEstimatorPass.cpp index 12b59e0a6fd5..b284b995ac78 100644 --- a/lib/Analysis/ProfileEstimatorPass.cpp +++ b/lib/Analysis/ProfileEstimatorPass.cpp @@ -12,14 +12,14 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-estimator" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ProfileInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; static cl::opt diff --git a/lib/Analysis/ProfileInfo.cpp b/lib/Analysis/ProfileInfo.cpp index b5b7ac1e5011..9626a48b9d0d 100644 --- a/lib/Analysis/ProfileInfo.cpp +++ b/lib/Analysis/ProfileInfo.cpp @@ -12,16 +12,16 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-info" -#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/Analysis/Passes.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/Pass.h" #include "llvm/Support/CFG.h" -#include "llvm/ADT/SmallSet.h" -#include -#include #include +#include +#include using namespace llvm; namespace llvm { @@ -249,7 +249,7 @@ const BasicBlock *ProfileInfoT:: succ_const_iterator Succ = succ_begin(BB), End = succ_end(BB); if (Succ == End) { - P[0] = BB; + P[(const BasicBlock*)0] = BB; if (Mode & GetPathToExit) { hasFoundPath = true; BB = 0; @@ -752,10 +752,10 @@ void ProfileInfoT::repair(const Function *F) { Succ != End; ++Succ) { Path P; GetPath(*Succ, 0, P, GetPathToExit); - if (Dest && Dest != P[0]) { + if (Dest && Dest != P[(const BasicBlock*)0]) { AllEdgesHaveSameReturn = false; } - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; } if (AllEdgesHaveSameReturn) { if(EstimateMissingEdges(BB)) { @@ -927,7 +927,7 @@ void ProfileInfoT::repair(const Function *F) { Path P; const BasicBlock *Dest = GetPath(BB, 0, P, GetPathToExit | GetPathWithNewEdges); - Dest = P[0]; + Dest = P[(const BasicBlock*)0]; if (!Dest) continue; if (getEdgeWeight(getEdge(Dest,0)) == MissingValue) { diff --git a/lib/Analysis/ProfileInfoLoader.cpp b/lib/Analysis/ProfileInfoLoader.cpp index 5c7c97cad1e5..f1f3e940c932 100644 --- a/lib/Analysis/ProfileInfoLoader.cpp +++ b/lib/Analysis/ProfileInfoLoader.cpp @@ -14,8 +14,8 @@ #include "llvm/Analysis/ProfileInfoLoader.h" #include "llvm/Analysis/ProfileInfoTypes.h" -#include "llvm/Module.h" -#include "llvm/InstrTypes.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/raw_ostream.h" #include #include diff --git a/lib/Analysis/ProfileInfoLoaderPass.cpp b/lib/Analysis/ProfileInfoLoaderPass.cpp index 5ecf052a1a24..346f8d6d6258 100644 --- a/lib/Analysis/ProfileInfoLoaderPass.cpp +++ b/lib/Analysis/ProfileInfoLoaderPass.cpp @@ -12,20 +12,20 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-loader" -#include "llvm/BasicBlock.h" -#include "llvm/InstrTypes.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" #include "llvm/Analysis/Passes.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/Analysis/ProfileInfoLoader.h" -#include "llvm/Support/CommandLine.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InstrTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Format.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; diff --git a/lib/Analysis/ProfileVerifierPass.cpp b/lib/Analysis/ProfileVerifierPass.cpp index 0cb158865afe..c8896de89301 100644 --- a/lib/Analysis/ProfileVerifierPass.cpp +++ b/lib/Analysis/ProfileVerifierPass.cpp @@ -12,17 +12,18 @@ // //===----------------------------------------------------------------------===// #define DEBUG_TYPE "profile-verifier" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/ProfileInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/CallSite.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" #include "llvm/Support/InstIterator.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/Debug.h" #include using namespace llvm; diff --git a/lib/Analysis/PtrUseVisitor.cpp b/lib/Analysis/PtrUseVisitor.cpp new file mode 100644 index 000000000000..0a342b2167e4 --- /dev/null +++ b/lib/Analysis/PtrUseVisitor.cpp @@ -0,0 +1,36 @@ +//===- PtrUseVisitor.cpp - InstVisitors over a pointers uses --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// Implementation of the pointer use visitors. +/// +//===----------------------------------------------------------------------===// + +#include "llvm/Analysis/PtrUseVisitor.h" + +using namespace llvm; + +void detail::PtrUseVisitorBase::enqueueUsers(Instruction &I) { + for (Value::use_iterator UI = I.use_begin(), UE = I.use_end(); + UI != UE; ++UI) { + if (VisitedUses.insert(&UI.getUse())) { + UseToVisit NewU = { + UseToVisit::UseAndIsOffsetKnownPair(&UI.getUse(), IsOffsetKnown), + Offset + }; + Worklist.push_back(llvm_move(NewU)); + } + } +} + +bool detail::PtrUseVisitorBase::adjustOffsetForGEP(GetElementPtrInst &GEPI) { + if (!IsOffsetKnown) + return false; + + return GEPI.accumulateConstantOffset(DL, Offset); +} diff --git a/lib/Analysis/RegionInfo.cpp b/lib/Analysis/RegionInfo.cpp index 30f0d2f10d86..fad5074086ce 100644 --- a/lib/Analysis/RegionInfo.cpp +++ b/lib/Analysis/RegionInfo.cpp @@ -10,14 +10,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/RegionInfo.h" -#include "llvm/Analysis/RegionIterator.h" - #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/Statistic.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/RegionIterator.h" #include "llvm/Assembly/Writer.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" #define DEBUG_TYPE "region" #include "llvm/Support/Debug.h" diff --git a/lib/Analysis/RegionPrinter.cpp b/lib/Analysis/RegionPrinter.cpp index 8b23cc704242..c5f1b925921b 100644 --- a/lib/Analysis/RegionPrinter.cpp +++ b/lib/Analysis/RegionPrinter.cpp @@ -9,16 +9,16 @@ // Print out the region tree of a function using dotty/graphviz. //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DOTGraphTraitsPass.h" #include "llvm/Analysis/RegionInfo.h" #include "llvm/Analysis/RegionIterator.h" #include "llvm/Analysis/RegionPrinter.h" -#include "llvm/Analysis/Passes.h" -#include "llvm/Analysis/DOTGraphTraitsPass.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/Support/Debug.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index e3189ecc8994..6ea915fdb0b7 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -59,22 +59,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "scalar-evolution" -#include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/Instructions.h" -#include "llvm/LLVMContext.h" -#include "llvm/Operator.h" +#include "llvm/Analysis/ScalarEvolution.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" +#include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLibraryInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/Debug.h" @@ -83,9 +86,7 @@ #include "llvm/Support/InstIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetLibraryInfo.h" #include using namespace llvm; @@ -4229,6 +4230,25 @@ ScalarEvolution::BackedgeTakenInfo::getMax(ScalarEvolution *SE) const { return Max ? Max : SE->getCouldNotCompute(); } +bool ScalarEvolution::BackedgeTakenInfo::hasOperand(const SCEV *S, + ScalarEvolution *SE) const { + if (Max && Max != SE->getCouldNotCompute() && SE->hasOperand(Max, S)) + return true; + + if (!ExitNotTaken.ExitingBlock) + return false; + + for (const ExitNotTakenInfo *ENT = &ExitNotTaken; + ENT != 0; ENT = ENT->getNextExit()) { + + if (ENT->ExactNotTaken != SE->getCouldNotCompute() + && SE->hasOperand(ENT->ExactNotTaken, S)) { + return true; + } + } + return false; +} + /// Allocate memory for BackedgeTakenInfo and copy the not-taken count of each /// computable exit into a persistent ExitNotTakenInfo array. ScalarEvolution::BackedgeTakenInfo::BackedgeTakenInfo( @@ -6120,8 +6140,8 @@ bool ScalarEvolution::isImpliedCond(ICmpInst::Predicate Pred, getTypeSizeInBits(ICI->getOperand(0)->getType())) return false; - // Now that we found a conditional branch that dominates the loop, check to - // see if it is the comparison we are looking for. + // Now that we found a conditional branch that dominates the loop or controls + // the loop latch. Check to see if it is the comparison we are looking for. ICmpInst::Predicate FoundPred; if (Inverse) FoundPred = ICI->getInversePredicate(); @@ -6939,6 +6959,17 @@ void ScalarEvolution::forgetMemoizedResults(const SCEV *S) { BlockDispositions.erase(S); UnsignedRanges.erase(S); SignedRanges.erase(S); + + for (DenseMap::iterator I = + BackedgeTakenCounts.begin(), E = BackedgeTakenCounts.end(); I != E; ) { + BackedgeTakenInfo &BEInfo = I->second; + if (BEInfo.hasOperand(S, this)) { + BEInfo.clear(); + BackedgeTakenCounts.erase(I++); + } + else + ++I; + } } typedef DenseMap VerifyMap; diff --git a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp index e9edb3e083de..79c5f0deb03b 100644 --- a/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp +++ b/lib/Analysis/ScalarEvolutionAliasAnalysis.cpp @@ -19,9 +19,9 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Analysis/Passes.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ScalarEvolutionExpressions.h" -#include "llvm/Analysis/Passes.h" #include "llvm/Pass.h" using namespace llvm; diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 111bfb4a6a76..fcd7ce272a22 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -14,13 +14,13 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ScalarEvolutionExpander.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/LoopInfo.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/STLExtras.h" using namespace llvm; @@ -1523,9 +1523,8 @@ Value *SCEVExpander::expand(const SCEV *S) { } // Check to see if we already expanded this here. - std::map, - AssertingVH >::iterator I = - InsertedExpressions.find(std::make_pair(S, InsertPt)); + std::map, TrackingVH >::iterator + I = InsertedExpressions.find(std::make_pair(S, InsertPt)); if (I != InsertedExpressions.end()) return I->second; @@ -1600,14 +1599,14 @@ static bool width_descending(Value *lhs, Value *rhs) { /// the same context that SCEVExpander is used. unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, SmallVectorImpl &DeadInsts, - const TargetLowering *TLI) { + const TargetTransformInfo *TTI) { // Find integer phis in order of increasing width. SmallVector Phis; for (BasicBlock::iterator I = L->getHeader()->begin(); PHINode *Phi = dyn_cast(I); ++I) { Phis.push_back(Phi); } - if (TLI) + if (TTI) std::sort(Phis.begin(), Phis.end(), width_descending); unsigned NumElim = 0; @@ -1635,8 +1634,8 @@ unsigned SCEVExpander::replaceCongruentIVs(Loop *L, const DominatorTree *DT, PHINode *&OrigPhiRef = ExprToIVMap[SE.getSCEV(Phi)]; if (!OrigPhiRef) { OrigPhiRef = Phi; - if (Phi->getType()->isIntegerTy() && TLI - && TLI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { + if (Phi->getType()->isIntegerTy() && TTI + && TTI->isTruncateFree(Phi->getType(), Phis.back()->getType())) { // This phi can be freely truncated to the narrowest phi type. Map the // truncated expression to it so it will be reused for narrow types. const SCEV *TruncExpr = diff --git a/lib/Analysis/SparsePropagation.cpp b/lib/Analysis/SparsePropagation.cpp index c819666ee444..15b78728a73c 100644 --- a/lib/Analysis/SparsePropagation.cpp +++ b/lib/Analysis/SparsePropagation.cpp @@ -14,9 +14,9 @@ #define DEBUG_TYPE "sparseprop" #include "llvm/Analysis/SparsePropagation.h" -#include "llvm/Constants.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/TargetTransformInfo.cpp b/lib/Analysis/TargetTransformInfo.cpp new file mode 100644 index 000000000000..64f8e96884c7 --- /dev/null +++ b/lib/Analysis/TargetTransformInfo.cpp @@ -0,0 +1,558 @@ +//===- llvm/Analysis/TargetTransformInfo.cpp ------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "tti" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Instructions.h" +#include "llvm/Support/CallSite.h" +#include "llvm/Support/ErrorHandling.h" + +using namespace llvm; + +// Setup the analysis group to manage the TargetTransformInfo passes. +INITIALIZE_ANALYSIS_GROUP(TargetTransformInfo, "Target Information", NoTTI) +char TargetTransformInfo::ID = 0; + +TargetTransformInfo::~TargetTransformInfo() { +} + +void TargetTransformInfo::pushTTIStack(Pass *P) { + TopTTI = this; + PrevTTI = &P->getAnalysis(); + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = this; +} + +void TargetTransformInfo::popTTIStack() { + TopTTI = 0; + + // Walk up the chain and update the top TTI pointer. + for (TargetTransformInfo *PTTI = PrevTTI; PTTI; PTTI = PTTI->PrevTTI) + PTTI->TopTTI = PrevTTI; + + PrevTTI = 0; +} + +void TargetTransformInfo::getAnalysisUsage(AnalysisUsage &AU) const { + AU.addRequired(); +} + +unsigned TargetTransformInfo::getOperationCost(unsigned Opcode, Type *Ty, + Type *OpTy) const { + return PrevTTI->getOperationCost(Opcode, Ty, OpTy); +} + +unsigned TargetTransformInfo::getGEPCost( + const Value *Ptr, ArrayRef Operands) const { + return PrevTTI->getGEPCost(Ptr, Operands); +} + +unsigned TargetTransformInfo::getCallCost(FunctionType *FTy, + int NumArgs) const { + return PrevTTI->getCallCost(FTy, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost(const Function *F, + int NumArgs) const { + return PrevTTI->getCallCost(F, NumArgs); +} + +unsigned TargetTransformInfo::getCallCost( + const Function *F, ArrayRef Arguments) const { + return PrevTTI->getCallCost(F, Arguments); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef ParamTys) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, ParamTys); +} + +unsigned TargetTransformInfo::getIntrinsicCost( + Intrinsic::ID IID, Type *RetTy, ArrayRef Arguments) const { + return PrevTTI->getIntrinsicCost(IID, RetTy, Arguments); +} + +unsigned TargetTransformInfo::getUserCost(const User *U) const { + return PrevTTI->getUserCost(U); +} + +bool TargetTransformInfo::isLoweredToCall(const Function *F) const { + return PrevTTI->isLoweredToCall(F); +} + +bool TargetTransformInfo::isLegalAddImmediate(int64_t Imm) const { + return PrevTTI->isLegalAddImmediate(Imm); +} + +bool TargetTransformInfo::isLegalICmpImmediate(int64_t Imm) const { + return PrevTTI->isLegalICmpImmediate(Imm); +} + +bool TargetTransformInfo::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, + bool HasBaseReg, + int64_t Scale) const { + return PrevTTI->isLegalAddressingMode(Ty, BaseGV, BaseOffset, HasBaseReg, + Scale); +} + +bool TargetTransformInfo::isTruncateFree(Type *Ty1, Type *Ty2) const { + return PrevTTI->isTruncateFree(Ty1, Ty2); +} + +bool TargetTransformInfo::isTypeLegal(Type *Ty) const { + return PrevTTI->isTypeLegal(Ty); +} + +unsigned TargetTransformInfo::getJumpBufAlignment() const { + return PrevTTI->getJumpBufAlignment(); +} + +unsigned TargetTransformInfo::getJumpBufSize() const { + return PrevTTI->getJumpBufSize(); +} + +bool TargetTransformInfo::shouldBuildLookupTables() const { + return PrevTTI->shouldBuildLookupTables(); +} + +TargetTransformInfo::PopcntSupportKind +TargetTransformInfo::getPopcntSupport(unsigned IntTyWidthInBit) const { + return PrevTTI->getPopcntSupport(IntTyWidthInBit); +} + +unsigned TargetTransformInfo::getIntImmCost(const APInt &Imm, Type *Ty) const { + return PrevTTI->getIntImmCost(Imm, Ty); +} + +unsigned TargetTransformInfo::getNumberOfRegisters(bool Vector) const { + return PrevTTI->getNumberOfRegisters(Vector); +} + +unsigned TargetTransformInfo::getRegisterBitWidth(bool Vector) const { + return PrevTTI->getRegisterBitWidth(Vector); +} + +unsigned TargetTransformInfo::getMaximumUnrollFactor() const { + return PrevTTI->getMaximumUnrollFactor(); +} + +unsigned TargetTransformInfo::getArithmeticInstrCost(unsigned Opcode, + Type *Ty, + OperandValueKind Op1Info, + OperandValueKind Op2Info) const { + return PrevTTI->getArithmeticInstrCost(Opcode, Ty, Op1Info, Op2Info); +} + +unsigned TargetTransformInfo::getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const { + return PrevTTI->getShuffleCost(Kind, Tp, Index, SubTp); +} + +unsigned TargetTransformInfo::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return PrevTTI->getCastInstrCost(Opcode, Dst, Src); +} + +unsigned TargetTransformInfo::getCFInstrCost(unsigned Opcode) const { + return PrevTTI->getCFInstrCost(Opcode); +} + +unsigned TargetTransformInfo::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + return PrevTTI->getCmpSelInstrCost(Opcode, ValTy, CondTy); +} + +unsigned TargetTransformInfo::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return PrevTTI->getVectorInstrCost(Opcode, Val, Index); +} + +unsigned TargetTransformInfo::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return PrevTTI->getMemoryOpCost(Opcode, Src, Alignment, AddressSpace); + ; +} + +unsigned +TargetTransformInfo::getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef Tys) const { + return PrevTTI->getIntrinsicInstrCost(ID, RetTy, Tys); +} + +unsigned TargetTransformInfo::getNumberOfParts(Type *Tp) const { + return PrevTTI->getNumberOfParts(Tp); +} + +unsigned TargetTransformInfo::getAddressComputationCost(Type *Tp) const { + return PrevTTI->getAddressComputationCost(Tp); +} + +namespace { + +struct NoTTI : ImmutablePass, TargetTransformInfo { + const DataLayout *DL; + + NoTTI() : ImmutablePass(ID), DL(0) { + initializeNoTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + // Note that this subclass is special, and must *not* call initializeTTI as + // it does not chain. + TopTTI = this; + PrevTTI = 0; + DL = getAnalysisIfAvailable(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + // Note that this subclass is special, and must *not* call + // TTI::getAnalysisUsage as it breaks the recursion. + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + unsigned getOperationCost(unsigned Opcode, Type *Ty, Type *OpTy) const { + switch (Opcode) { + default: + // By default, just classify everything as 'basic'. + return TCC_Basic; + + case Instruction::GetElementPtr: + llvm_unreachable("Use getGEPCost for GEP operations!"); + + case Instruction::BitCast: + assert(OpTy && "Cast instructions must provide the operand type"); + if (Ty == OpTy || (Ty->isPointerTy() && OpTy->isPointerTy())) + // Identity and pointer-to-pointer casts are free. + return TCC_Free; + + // Otherwise, the default basic cost is used. + return TCC_Basic; + + case Instruction::IntToPtr: + // An inttoptr cast is free so long as the input is a legal integer type + // which doesn't contain values outside the range of a pointer. + if (DL && DL->isLegalInteger(OpTy->getScalarSizeInBits()) && + OpTy->getScalarSizeInBits() <= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::PtrToInt: + // A ptrtoint cast is free so long as the result is large enough to store + // the pointer, and a legal integer type. + if (DL && DL->isLegalInteger(Ty->getScalarSizeInBits()) && + Ty->getScalarSizeInBits() >= DL->getPointerSizeInBits()) + return TCC_Free; + + // Otherwise it's not a no-op. + return TCC_Basic; + + case Instruction::Trunc: + // trunc to a native type is free (assuming the target has compare and + // shift-right of the same width). + if (DL && DL->isLegalInteger(DL->getTypeSizeInBits(Ty))) + return TCC_Free; + + return TCC_Basic; + } + } + + unsigned getGEPCost(const Value *Ptr, + ArrayRef Operands) const { + // In the basic model, we just assume that all-constant GEPs will be folded + // into their uses via addressing modes. + for (unsigned Idx = 0, Size = Operands.size(); Idx != Size; ++Idx) + if (!isa(Operands[Idx])) + return TCC_Basic; + + return TCC_Free; + } + + unsigned getCallCost(FunctionType *FTy, int NumArgs = -1) const { + assert(FTy && "FunctionType must be provided to this routine."); + + // The target-independent implementation just measures the size of the + // function by approximating that each argument will take on average one + // instruction to prepare. + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = FTy->getNumParams(); + + return TCC_Basic * (NumArgs + 1); + } + + unsigned getCallCost(const Function *F, int NumArgs = -1) const { + assert(F && "A concrete function must be provided to this routine."); + + if (NumArgs < 0) + // Set the argument number to the number of explicit arguments in the + // function. + NumArgs = F->arg_size(); + + if (Intrinsic::ID IID = (Intrinsic::ID)F->getIntrinsicID()) { + FunctionType *FTy = F->getFunctionType(); + SmallVector ParamTys(FTy->param_begin(), FTy->param_end()); + return TopTTI->getIntrinsicCost(IID, FTy->getReturnType(), ParamTys); + } + + if (!TopTTI->isLoweredToCall(F)) + return TCC_Basic; // Give a basic cost if it will be lowered directly. + + return TopTTI->getCallCost(F->getFunctionType(), NumArgs); + } + + unsigned getCallCost(const Function *F, + ArrayRef Arguments) const { + // Simply delegate to generic handling of the call. + // FIXME: We should use instsimplify or something else to catch calls which + // will constant fold with these arguments. + return TopTTI->getCallCost(F, Arguments.size()); + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef ParamTys) const { + switch (IID) { + default: + // Intrinsics rarely (if ever) have normal argument setup constraints. + // Model them as having a basic instruction cost. + // FIXME: This is wrong for libc intrinsics. + return TCC_Basic; + + case Intrinsic::dbg_declare: + case Intrinsic::dbg_value: + case Intrinsic::invariant_start: + case Intrinsic::invariant_end: + case Intrinsic::lifetime_start: + case Intrinsic::lifetime_end: + case Intrinsic::objectsize: + case Intrinsic::ptr_annotation: + case Intrinsic::var_annotation: + // These intrinsics don't actually represent code after lowering. + return TCC_Free; + } + } + + unsigned getIntrinsicCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Arguments) const { + // Delegate to the generic intrinsic handling code. This mostly provides an + // opportunity for targets to (for example) special case the cost of + // certain intrinsics based on constants used as arguments. + SmallVector ParamTys; + ParamTys.reserve(Arguments.size()); + for (unsigned Idx = 0, Size = Arguments.size(); Idx != Size; ++Idx) + ParamTys.push_back(Arguments[Idx]->getType()); + return TopTTI->getIntrinsicCost(IID, RetTy, ParamTys); + } + + unsigned getUserCost(const User *U) const { + if (isa(U)) + return TCC_Free; // Model all PHI nodes as free. + + if (const GEPOperator *GEP = dyn_cast(U)) + // In the basic model we just assume that all-constant GEPs will be + // folded into their uses via addressing modes. + return GEP->hasAllConstantIndices() ? TCC_Free : TCC_Basic; + + if (ImmutableCallSite CS = U) { + const Function *F = CS.getCalledFunction(); + if (!F) { + // Just use the called value type. + Type *FTy = CS.getCalledValue()->getType()->getPointerElementType(); + return TopTTI->getCallCost(cast(FTy), CS.arg_size()); + } + + SmallVector Arguments; + for (ImmutableCallSite::arg_iterator AI = CS.arg_begin(), + AE = CS.arg_end(); + AI != AE; ++AI) + Arguments.push_back(*AI); + + return TopTTI->getCallCost(F, Arguments); + } + + if (const CastInst *CI = dyn_cast(U)) { + // Result of a cmp instruction is often extended (to be used by other + // cmp instructions, logical or return instructions). These are usually + // nop on most sane targets. + if (isa(CI->getOperand(0))) + return TCC_Free; + } + + // Otherwise delegate to the fully generic implementations. + return getOperationCost(Operator::getOpcode(U), U->getType(), + U->getNumOperands() == 1 ? + U->getOperand(0)->getType() : 0); + } + + bool isLoweredToCall(const Function *F) const { + // FIXME: These should almost certainly not be handled here, and instead + // handled with the help of TLI or the target itself. This was largely + // ported from existing analysis heuristics here so that such refactorings + // can take place in the future. + + if (F->isIntrinsic()) + return false; + + if (F->hasLocalLinkage() || !F->hasName()) + return true; + + StringRef Name = F->getName(); + + // These will all likely lower to a single selection DAG node. + if (Name == "copysign" || Name == "copysignf" || Name == "copysignl" || + Name == "fabs" || Name == "fabsf" || Name == "fabsl" || Name == "sin" || + Name == "sinf" || Name == "sinl" || Name == "cos" || Name == "cosf" || + Name == "cosl" || Name == "sqrt" || Name == "sqrtf" || Name == "sqrtl") + return false; + + // These are all likely to be optimized into something smaller. + if (Name == "pow" || Name == "powf" || Name == "powl" || Name == "exp2" || + Name == "exp2l" || Name == "exp2f" || Name == "floor" || Name == + "floorf" || Name == "ceil" || Name == "round" || Name == "ffs" || + Name == "ffsl" || Name == "abs" || Name == "labs" || Name == "llabs") + return false; + + return true; + } + + bool isLegalAddImmediate(int64_t Imm) const { + return false; + } + + bool isLegalICmpImmediate(int64_t Imm) const { + return false; + } + + bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, int64_t BaseOffset, + bool HasBaseReg, int64_t Scale) const { + // Guess that reg+reg addressing is allowed. This heuristic is taken from + // the implementation of LSR. + return !BaseGV && BaseOffset == 0 && Scale <= 1; + } + + bool isTruncateFree(Type *Ty1, Type *Ty2) const { + return false; + } + + bool isTypeLegal(Type *Ty) const { + return false; + } + + unsigned getJumpBufAlignment() const { + return 0; + } + + unsigned getJumpBufSize() const { + return 0; + } + + bool shouldBuildLookupTables() const { + return true; + } + + PopcntSupportKind getPopcntSupport(unsigned IntTyWidthInBit) const { + return PSK_Software; + } + + unsigned getIntImmCost(const APInt &Imm, Type *Ty) const { + return 1; + } + + unsigned getNumberOfRegisters(bool Vector) const { + return 8; + } + + unsigned getRegisterBitWidth(bool Vector) const { + return 32; + } + + unsigned getMaximumUnrollFactor() const { + return 1; + } + + unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, OperandValueKind, + OperandValueKind) const { + return 1; + } + + unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index = 0, Type *SubTp = 0) const { + return 1; + } + + unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + return 1; + } + + unsigned getCFInstrCost(unsigned Opcode) const { + return 1; + } + + unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy = 0) const { + return 1; + } + + unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index = -1) const { + return 1; + } + + unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + return 1; + } + + unsigned getIntrinsicInstrCost(Intrinsic::ID ID, + Type *RetTy, + ArrayRef Tys) const { + return 1; + } + + unsigned getNumberOfParts(Type *Tp) const { + return 0; + } + + unsigned getAddressComputationCost(Type *Tp) const { + return 0; + } +}; + +} // end anonymous namespace + +INITIALIZE_AG_PASS(NoTTI, TargetTransformInfo, "notti", + "No target information", true, true, true) +char NoTTI::ID = 0; + +ImmutablePass *llvm::createNoTargetTransformInfoPass() { + return new NoTTI(); +} diff --git a/lib/Analysis/Trace.cpp b/lib/Analysis/Trace.cpp index 22da85762034..4c68322b8282 100644 --- a/lib/Analysis/Trace.cpp +++ b/lib/Analysis/Trace.cpp @@ -16,8 +16,8 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/Trace.h" -#include "llvm/Function.h" #include "llvm/Assembly/Writer.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/Analysis/TypeBasedAliasAnalysis.cpp b/lib/Analysis/TypeBasedAliasAnalysis.cpp index 0faf1398ec76..68e43b2cdb63 100644 --- a/lib/Analysis/TypeBasedAliasAnalysis.cpp +++ b/lib/Analysis/TypeBasedAliasAnalysis.cpp @@ -57,12 +57,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/Passes.h" -#include "llvm/Constants.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Metadata.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" using namespace llvm; diff --git a/lib/Analysis/ValueTracking.cpp b/lib/Analysis/ValueTracking.cpp index 3beb373dc5cc..45dcc5e37ecf 100644 --- a/lib/Analysis/ValueTracking.cpp +++ b/lib/Analysis/ValueTracking.cpp @@ -13,21 +13,21 @@ //===----------------------------------------------------------------------===// #include "llvm/Analysis/ValueTracking.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/InstructionSimplify.h" -#include "llvm/Constants.h" -#include "llvm/Instructions.h" -#include "llvm/GlobalVariable.h" -#include "llvm/GlobalAlias.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Operator.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/ConstantRange.h" #include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/PatternMatch.h" -#include "llvm/ADT/SmallPtrSet.h" #include using namespace llvm; using namespace llvm::PatternMatch; @@ -58,7 +58,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, // NLZ can't be BitWidth with no sign bit APInt MaskV = APInt::getHighBitsSet(BitWidth, NLZ+1); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - + // If all of the MaskV bits are known to be zero, then we know the // output top bits are zero, because we now know that the output is // from [0-C]. @@ -84,7 +84,7 @@ static void ComputeMaskedBitsAddSub(bool Add, Value *Op0, Value *Op1, bool NSW, unsigned LHSKnownZeroOut = LHSKnownZero.countTrailingOnes(); llvm::ComputeMaskedBits(Op1, KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); unsigned RHSKnownZeroOut = KnownZero2.countTrailingOnes(); // Determine which operand has more trailing zeros, and use that @@ -266,11 +266,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { Elt = CDS->getElementAsInteger(i); KnownZero &= ~Elt; - KnownOne &= Elt; + KnownOne &= Elt; } return; } - + // The address of an aligned GlobalValue has trailing zeros. if (GlobalValue *GV = dyn_cast(V)) { unsigned Align = GV->getAlignment(); @@ -306,7 +306,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, } return; } - + if (Argument *A = dyn_cast(V)) { unsigned Align = 0; @@ -345,9 +345,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, // If either the LHS or the RHS are Zero, the result is zero. ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-1 bits are only known if set in both the LHS & RHS. KnownOne &= KnownOne2; // Output known-0 are known to be clear if zero in either the LHS | RHS. @@ -357,9 +357,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Or: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are only known if clear in both the LHS & RHS. KnownZero &= KnownZero2; // Output known-1 are known to be set if set in either the LHS | RHS. @@ -369,9 +369,9 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::Xor: { ComputeMaskedBits(I->getOperand(1), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(0), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); - + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + // Output known-0 bits are known if clear or set in both the LHS & RHS. APInt KnownZeroOut = (KnownZero & KnownZero2) | (KnownOne & KnownOne2); // Output known-1 are known to be set if set in only one of the LHS, RHS. @@ -407,8 +407,8 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, ComputeMaskedBits(I->getOperand(2), KnownZero, KnownOne, TD, Depth+1); ComputeMaskedBits(I->getOperand(1), KnownZero2, KnownOne2, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); - assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero2 & KnownOne2) == 0 && "Bits known to be one AND zero?"); // Only known if known in both the LHS and RHS. KnownOne &= KnownOne2; @@ -433,7 +433,12 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned SrcBitWidth; // Note that we handle pointer operands here because of inttoptr/ptrtoint // which fall through here. - SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + if(TD) { + SrcBitWidth = TD->getTypeSizeInBits(SrcTy->getScalarType()); + } else { + SrcBitWidth = SrcTy->getScalarSizeInBits(); + if (!SrcBitWidth) return; + } assert(SrcBitWidth && "SrcBitWidth can't be zero"); KnownZero = KnownZero.zextOrTrunc(SrcBitWidth); @@ -460,11 +465,11 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, case Instruction::SExt: { // Compute the bits in the result that are not present in the input. unsigned SrcBitWidth = I->getOperand(0)->getType()->getScalarSizeInBits(); - + KnownZero = KnownZero.trunc(SrcBitWidth); KnownOne = KnownOne.trunc(SrcBitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = KnownZero.zext(BitWidth); KnownOne = KnownOne.zext(BitWidth); @@ -481,7 +486,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero <<= ShiftAmt; KnownOne <<= ShiftAmt; KnownZero |= APInt::getLowBitsSet(BitWidth, ShiftAmt); // low bits known 0 @@ -493,10 +498,10 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth); - + // Unsigned shift right. ComputeMaskedBits(I->getOperand(0), KnownZero,KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); // high bits known zero. @@ -509,13 +514,13 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (ConstantInt *SA = dyn_cast(I->getOperand(1))) { // Compute the new bits that are at the top now. uint64_t ShiftAmt = SA->getLimitedValue(BitWidth-1); - + // Signed shift right. ComputeMaskedBits(I->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); KnownZero = APIntOps::lshr(KnownZero, ShiftAmt); KnownOne = APIntOps::lshr(KnownOne, ShiftAmt); - + APInt HighBits(APInt::getHighBitsSet(BitWidth, ShiftAmt)); if (KnownZero[BitWidth-ShiftAmt-1]) // New bits are known zero. KnownZero |= HighBits; @@ -559,7 +564,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, if (KnownOne2[BitWidth-1] && ((KnownOne2 & LowBits) != 0)) KnownOne |= ~LowBits; - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); } } @@ -606,7 +611,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, unsigned Align = AI->getAlignment(); if (Align == 0 && TD) Align = TD->getABITypeAlignment(AI->getType()->getElementType()); - + if (Align > 0) KnownZero = APInt::getLowBitsSet(BitWidth, CountTrailingZeros_32(Align)); break; @@ -643,7 +648,7 @@ void llvm::ComputeMaskedBits(Value *V, APInt &KnownZero, APInt &KnownOne, LocalKnownZero.countTrailingOnes())); } } - + KnownZero = APInt::getLowBitsSet(BitWidth, TrailZ); break; } @@ -799,12 +804,11 @@ void llvm::ComputeSignBit(Value *V, bool &KnownZero, bool &KnownOne, KnownZero = ZeroBits[BitWidth - 1]; } -/// isPowerOfTwo - Return true if the given value is known to have exactly one +/// isKnownToBeAPowerOfTwo - Return true if the given value is known to have exactly one /// bit set when defined. For vectors return true if every element is known to /// be a power of two when defined. Supports values with integer or pointer /// types and vectors of integers. -bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, - unsigned Depth) { +bool llvm::isKnownToBeAPowerOfTwo(Value *V, bool OrZero, unsigned Depth) { if (Constant *C = dyn_cast(V)) { if (C->isNullValue()) return OrZero; @@ -831,19 +835,19 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // A shift of a power of two is a power of two or zero. if (OrZero && (match(V, m_Shl(m_Value(X), m_Value())) || match(V, m_Shr(m_Value(X), m_Value())))) - return isPowerOfTwo(X, TD, /*OrZero*/true, Depth); + return isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth); if (ZExtInst *ZI = dyn_cast(V)) - return isPowerOfTwo(ZI->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(ZI->getOperand(0), OrZero, Depth); if (SelectInst *SI = dyn_cast(V)) - return isPowerOfTwo(SI->getTrueValue(), TD, OrZero, Depth) && - isPowerOfTwo(SI->getFalseValue(), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(SI->getTrueValue(), OrZero, Depth) && + isKnownToBeAPowerOfTwo(SI->getFalseValue(), OrZero, Depth); if (OrZero && match(V, m_And(m_Value(X), m_Value(Y)))) { // A power of two and'd with anything is a power of two or zero. - if (isPowerOfTwo(X, TD, /*OrZero*/true, Depth) || - isPowerOfTwo(Y, TD, /*OrZero*/true, Depth)) + if (isKnownToBeAPowerOfTwo(X, /*OrZero*/true, Depth) || + isKnownToBeAPowerOfTwo(Y, /*OrZero*/true, Depth)) return true; // X & (-X) is always a power of two or zero. if (match(X, m_Neg(m_Specific(Y))) || match(Y, m_Neg(m_Specific(X)))) @@ -856,7 +860,73 @@ bool llvm::isPowerOfTwo(Value *V, const DataLayout *TD, bool OrZero, // copying a sign bit (sdiv int_min, 2). if (match(V, m_Exact(m_LShr(m_Value(), m_Value()))) || match(V, m_Exact(m_UDiv(m_Value(), m_Value())))) { - return isPowerOfTwo(cast(V)->getOperand(0), TD, OrZero, Depth); + return isKnownToBeAPowerOfTwo(cast(V)->getOperand(0), OrZero, Depth); + } + + return false; +} + +/// \brief Test whether a GEP's result is known to be non-null. +/// +/// Uses properties inherent in a GEP to try to determine whether it is known +/// to be non-null. +/// +/// Currently this routine does not support vector GEPs. +static bool isGEPKnownNonNull(GEPOperator *GEP, const DataLayout *DL, + unsigned Depth) { + if (!GEP->isInBounds() || GEP->getPointerAddressSpace() != 0) + return false; + + // FIXME: Support vector-GEPs. + assert(GEP->getType()->isPointerTy() && "We only support plain pointer GEP"); + + // If the base pointer is non-null, we cannot walk to a null address with an + // inbounds GEP in address space zero. + if (isKnownNonZero(GEP->getPointerOperand(), DL, Depth)) + return true; + + // Past this, if we don't have DataLayout, we can't do much. + if (!DL) + return false; + + // Walk the GEP operands and see if any operand introduces a non-zero offset. + // If so, then the GEP cannot produce a null pointer, as doing so would + // inherently violate the inbounds contract within address space zero. + for (gep_type_iterator GTI = gep_type_begin(GEP), GTE = gep_type_end(GEP); + GTI != GTE; ++GTI) { + // Struct types are easy -- they must always be indexed by a constant. + if (StructType *STy = dyn_cast(*GTI)) { + ConstantInt *OpC = cast(GTI.getOperand()); + unsigned ElementIdx = OpC->getZExtValue(); + const StructLayout *SL = DL->getStructLayout(STy); + uint64_t ElementOffset = SL->getElementOffset(ElementIdx); + if (ElementOffset > 0) + return true; + continue; + } + + // If we have a zero-sized type, the index doesn't matter. Keep looping. + if (DL->getTypeAllocSize(GTI.getIndexedType()) == 0) + continue; + + // Fast path the constant operand case both for efficiency and so we don't + // increment Depth when just zipping down an all-constant GEP. + if (ConstantInt *OpC = dyn_cast(GTI.getOperand())) { + if (!OpC->isZero()) + return true; + continue; + } + + // We post-increment Depth here because while isKnownNonZero increments it + // as well, when we pop back up that increment won't persist. We don't want + // to recurse 10k times just because we have 10k GEP operands. We don't + // bail completely out because we want to handle constant GEPs regardless + // of depth. + if (Depth++ >= MaxDepth) + continue; + + if (isKnownNonZero(GTI.getOperand(), DL, Depth)) + return true; } return false; @@ -881,7 +951,16 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { if (Depth++ >= MaxDepth) return false; - unsigned BitWidth = getBitWidth(V->getType(), TD); + // Check for pointer simplifications. + if (V->getType()->isPointerTy()) { + if (isKnownNonNull(V)) + return true; + if (GEPOperator *GEP = dyn_cast(V)) + if (isGEPKnownNonNull(GEP, TD, Depth)) + return true; + } + + unsigned BitWidth = getBitWidth(V->getType()->getScalarType(), TD); // X | Y != 0 if X != 0 or Y != 0. Value *X = 0, *Y = 0; @@ -955,9 +1034,9 @@ bool llvm::isKnownNonZero(Value *V, const DataLayout *TD, unsigned Depth) { } // The sum of a non-negative number and a power of two is not zero. - if (XKnownNonNegative && isPowerOfTwo(Y, TD, /*OrZero*/false, Depth)) + if (XKnownNonNegative && isKnownToBeAPowerOfTwo(Y, /*OrZero*/false, Depth)) return true; - if (YKnownNonNegative && isPowerOfTwo(X, TD, /*OrZero*/false, Depth)) + if (YKnownNonNegative && isKnownToBeAPowerOfTwo(X, /*OrZero*/false, Depth)) return true; } // X * Y. @@ -996,7 +1075,7 @@ bool llvm::MaskedValueIsZero(Value *V, const APInt &Mask, const DataLayout *TD, unsigned Depth) { APInt KnownZero(Mask.getBitWidth(), 0), KnownOne(Mask.getBitWidth(), 0); ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); + assert((KnownZero & KnownOne) == 0 && "Bits known to be one AND zero?"); return (KnownZero & Mask) == Mask; } @@ -1026,14 +1105,14 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Depth == 6) return 1; // Limit search depth. - + Operator *U = dyn_cast(V); switch (Operator::getOpcode(V)) { default: break; case Instruction::SExt: Tmp = TyBits - U->getOperand(0)->getType()->getScalarSizeInBits(); return ComputeNumSignBits(U->getOperand(0), TD, Depth+1) + Tmp; - + case Instruction::AShr: { Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); // ashr X, C -> adds C sign bits. Vectors too. @@ -1075,38 +1154,38 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, if (Tmp == 1) return 1; // Early out. Tmp2 = ComputeNumSignBits(U->getOperand(2), TD, Depth+1); return std::min(Tmp, Tmp2); - + case Instruction::Add: // Add can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. - + // Special case decrementing a value (ADD X, -1): if (ConstantInt *CRHS = dyn_cast(U->getOperand(1))) if (CRHS->isAllOnesValue()) { APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); ComputeMaskedBits(U->getOperand(0), KnownZero, KnownOne, TD, Depth+1); - + // If the input is known to be 0 or 1, the output is 0/-1, which is all // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If we are subtracting one from a positive number, there is no carry // out of the result. if (KnownZero.isNegative()) return Tmp; } - + Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; return std::min(Tmp, Tmp2)-1; - + case Instruction::Sub: Tmp2 = ComputeNumSignBits(U->getOperand(1), TD, Depth+1); if (Tmp2 == 1) return 1; - + // Handle NEG. if (ConstantInt *CLHS = dyn_cast(U->getOperand(0))) if (CLHS->isNullValue()) { @@ -1116,26 +1195,26 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // sign bits set. if ((KnownZero | APInt(TyBits, 1)).isAllOnesValue()) return TyBits; - + // If the input is known to be positive (the sign bit is known clear), // the output of the NEG has the same number of sign bits as the input. if (KnownZero.isNegative()) return Tmp2; - + // Otherwise, we treat this like a SUB. } - + // Sub can have at most one carry bit. Thus we know that the output // is, at worst, one more bit than the inputs. Tmp = ComputeNumSignBits(U->getOperand(0), TD, Depth+1); if (Tmp == 1) return 1; // Early out. return std::min(Tmp, Tmp2)-1; - + case Instruction::PHI: { PHINode *PN = cast(U); // Don't analyze large in-degree PHIs. if (PN->getNumIncomingValues() > 4) break; - + // Take the minimum of all incoming values. This can't infinitely loop // because of our depth threshold. Tmp = ComputeNumSignBits(PN->getIncomingValue(0), TD, Depth+1); @@ -1152,13 +1231,13 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // case for targets like X86. break; } - + // Finally, if we can prove that the top bits of the result are 0's or 1's, // use this information. APInt KnownZero(TyBits, 0), KnownOne(TyBits, 0); APInt Mask; ComputeMaskedBits(V, KnownZero, KnownOne, TD, Depth); - + if (KnownZero.isNegative()) { // sign bit is 0 Mask = KnownZero; } else if (KnownOne.isNegative()) { // sign bit is 1; @@ -1167,7 +1246,7 @@ unsigned llvm::ComputeNumSignBits(Value *V, const DataLayout *TD, // Nothing known. return FirstAnswer; } - + // Okay, we know that the sign bit in Mask is set. Use CLZ to determine // the number of identical bits in the top of the input value. Mask = ~Mask; @@ -1195,7 +1274,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (Base == 0) return false; - + if (Base == 1) { Multiple = V; return true; @@ -1211,11 +1290,11 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (CI && CI->getZExtValue() % Base == 0) { Multiple = ConstantInt::get(T, CI->getZExtValue() / Base); - return true; + return true; } - + if (Depth == MaxDepth) return false; // Limit search depth. - + Operator *I = dyn_cast(V); if (!I) return false; @@ -1247,13 +1326,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op0, Base, Mul0, LookThroughSExt, Depth+1)) { if (Constant *Op1C = dyn_cast(Op1)) if (Constant *MulC = dyn_cast(Mul0)) { - if (Op1C->getType()->getPrimitiveSizeInBits() < + if (Op1C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op1C = ConstantExpr::getZExt(Op1C, MulC->getType()); - if (Op1C->getType()->getPrimitiveSizeInBits() > + if (Op1C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op1C->getType()); - + // V == Base * (Mul0 * Op1), so return (Mul0 * Op1) Multiple = ConstantExpr::getMul(MulC, Op1C); return true; @@ -1271,13 +1350,13 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, if (ComputeMultiple(Op1, Base, Mul1, LookThroughSExt, Depth+1)) { if (Constant *Op0C = dyn_cast(Op0)) if (Constant *MulC = dyn_cast(Mul1)) { - if (Op0C->getType()->getPrimitiveSizeInBits() < + if (Op0C->getType()->getPrimitiveSizeInBits() < MulC->getType()->getPrimitiveSizeInBits()) Op0C = ConstantExpr::getZExt(Op0C, MulC->getType()); - if (Op0C->getType()->getPrimitiveSizeInBits() > + if (Op0C->getType()->getPrimitiveSizeInBits() > MulC->getType()->getPrimitiveSizeInBits()) MulC = ConstantExpr::getZExt(MulC, Op0C->getType()); - + // V == Base * (Mul1 * Op0), so return (Mul1 * Op0) Multiple = ConstantExpr::getMul(MulC, Op0C); return true; @@ -1297,7 +1376,7 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, return false; } -/// CannotBeNegativeZero - Return true if we can prove that the specified FP +/// CannotBeNegativeZero - Return true if we can prove that the specified FP /// value is never equal to -0.0. /// /// NOTE: this function will need to be revisited when we support non-default @@ -1306,28 +1385,33 @@ bool llvm::ComputeMultiple(Value *V, unsigned Base, Value *&Multiple, bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { if (const ConstantFP *CFP = dyn_cast(V)) return !CFP->getValueAPF().isNegZero(); - + if (Depth == 6) return 1; // Limit search depth. const Operator *I = dyn_cast(V); if (I == 0) return false; - + + // Check if the nsz fast-math flag is set + if (const FPMathOperator *FPO = dyn_cast(I)) + if (FPO->hasNoSignedZeros()) + return true; + // (add x, 0.0) is guaranteed to return +0.0, not -0.0. - if (I->getOpcode() == Instruction::FAdd && - isa(I->getOperand(1)) && - cast(I->getOperand(1))->isNullValue()) - return true; - + if (I->getOpcode() == Instruction::FAdd) + if (ConstantFP *CFP = dyn_cast(I->getOperand(1))) + if (CFP->isNullValue()) + return true; + // sitofp and uitofp turn into +0.0 for zero. if (isa(I) || isa(I)) return true; - + if (const IntrinsicInst *II = dyn_cast(I)) // sqrt(-0.0) = -0.0, no other negative results are possible. if (II->getIntrinsicID() == Intrinsic::sqrt) return CannotBeNegativeZero(II->getArgOperand(0), Depth+1); - + if (const CallInst *CI = dyn_cast(I)) if (const Function *F = CI->getCalledFunction()) { if (F->isDeclaration()) { @@ -1342,7 +1426,7 @@ bool llvm::CannotBeNegativeZero(const Value *V, unsigned Depth) { return CannotBeNegativeZero(CI->getArgOperand(0), Depth+1); } } - + return false; } @@ -1359,9 +1443,9 @@ Value *llvm::isBytewiseValue(Value *V) { if (Constant *C = dyn_cast(V)) if (C->isNullValue()) return Constant::getNullValue(Type::getInt8Ty(V->getContext())); - + // Constant float and double values can be handled as integer values if the - // corresponding integer value is "byteable". An important case is 0.0. + // corresponding integer value is "byteable". An important case is 0.0. if (ConstantFP *CFP = dyn_cast(V)) { if (CFP->getType()->isFloatTy()) V = ConstantExpr::getBitCast(CFP, Type::getInt32Ty(V->getContext())); @@ -1369,8 +1453,8 @@ Value *llvm::isBytewiseValue(Value *V) { V = ConstantExpr::getBitCast(CFP, Type::getInt64Ty(V->getContext())); // Don't handle long double formats, which have strange constraints. } - - // We can handle constant integers that are power of two in size and a + + // We can handle constant integers that are power of two in size and a // multiple of 8 bits. if (ConstantInt *CI = dyn_cast(V)) { unsigned Width = CI->getBitWidth(); @@ -1384,7 +1468,7 @@ Value *llvm::isBytewiseValue(Value *V) { Val2 = Val.lshr(NextWidth); Val2 = Val2.trunc(Val.getBitWidth()/2); Val = Val.trunc(Val.getBitWidth()/2); - + // If the top/bottom halves aren't the same, reject it. if (Val != Val2) return 0; @@ -1392,7 +1476,7 @@ Value *llvm::isBytewiseValue(Value *V) { return ConstantInt::get(V->getContext(), Val); } } - + // A ConstantDataArray/Vector is splatable if all its members are equal and // also splatable. if (ConstantDataSequential *CA = dyn_cast(V)) { @@ -1400,11 +1484,11 @@ Value *llvm::isBytewiseValue(Value *V) { Value *Val = isBytewiseValue(Elt); if (!Val) return 0; - + for (unsigned I = 1, E = CA->getNumElements(); I != E; ++I) if (CA->getElementAsConstant(I) != Elt) return 0; - + return Val; } @@ -1428,7 +1512,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, SmallVector &Idxs, unsigned IdxSkip, Instruction *InsertBefore) { - llvm::StructType *STy = llvm::dyn_cast(IndexedType); + llvm::StructType *STy = dyn_cast(IndexedType); if (STy) { // Save the original To argument so we can modify it Value *OrigTo = To; @@ -1459,7 +1543,7 @@ static Value *BuildSubAggregate(Value *From, Value* To, Type *IndexedType, // the struct's elements had a value that was inserted directly. In the latter // case, perhaps we can't determine each of the subelements individually, but // we might be able to find the complete struct somewhere. - + // Find the value that is at that particular spot Value *V = FindInsertedValue(From, Idxs); @@ -1518,7 +1602,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, if (C == 0) return 0; return FindInsertedValue(C, idx_range.slice(1), InsertBefore); } - + if (InsertValueInst *I = dyn_cast(V)) { // Loop the indices for the insertvalue instruction in parallel with the // requested indices @@ -1543,7 +1627,7 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, return BuildSubAggregate(V, makeArrayRef(idx_range.begin(), req_idx), InsertBefore); } - + // This insert value inserts something else than what we are looking for. // See if the (aggregrate) value inserted into has the value we are // looking for, then. @@ -1558,26 +1642,26 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, makeArrayRef(req_idx, idx_range.end()), InsertBefore); } - + if (ExtractValueInst *I = dyn_cast(V)) { // If we're extracting a value from an aggregrate that was extracted from // something else, we can extract from that something else directly instead. // However, we will need to chain I's indices with the requested indices. - - // Calculate the number of indices required + + // Calculate the number of indices required unsigned size = I->getNumIndices() + idx_range.size(); // Allocate some space to put the new indices in SmallVector Idxs; Idxs.reserve(size); // Add indices from the extract value instruction Idxs.append(I->idx_begin(), I->idx_end()); - + // Add requested indices Idxs.append(idx_range.begin(), idx_range.end()); - assert(Idxs.size() == size + assert(Idxs.size() == size && "Number of indices added not correct?"); - + return FindInsertedValue(I->getAggregateOperand(), Idxs, InsertBefore); } // Otherwise, we don't know (such as, extracting from a function return value @@ -1589,41 +1673,33 @@ Value *llvm::FindInsertedValue(Value *V, ArrayRef idx_range, /// it can be expressed as a base pointer plus a constant offset. Return the /// base and offset to the caller. Value *llvm::GetPointerBaseWithConstantOffset(Value *Ptr, int64_t &Offset, - const DataLayout &TD) { - Operator *PtrOp = dyn_cast(Ptr); - if (PtrOp == 0 || Ptr->getType()->isVectorTy()) - return Ptr; - - // Just look through bitcasts. - if (PtrOp->getOpcode() == Instruction::BitCast) - return GetPointerBaseWithConstantOffset(PtrOp->getOperand(0), Offset, TD); - - // If this is a GEP with constant indices, we can look through it. - GEPOperator *GEP = dyn_cast(PtrOp); - if (GEP == 0 || !GEP->hasAllConstantIndices()) return Ptr; - - gep_type_iterator GTI = gep_type_begin(GEP); - for (User::op_iterator I = GEP->idx_begin(), E = GEP->idx_end(); I != E; - ++I, ++GTI) { - ConstantInt *OpC = cast(*I); - if (OpC->isZero()) continue; - - // Handle a struct and array indices which add their offset to the pointer. - if (StructType *STy = dyn_cast(*GTI)) { - Offset += TD.getStructLayout(STy)->getElementOffset(OpC->getZExtValue()); + const DataLayout *TD) { + // Without DataLayout, conservatively assume 64-bit offsets, which is + // the widest we support. + unsigned BitWidth = TD ? TD->getPointerSizeInBits() : 64; + APInt ByteOffset(BitWidth, 0); + while (1) { + if (Ptr->getType()->isVectorTy()) + break; + + if (GEPOperator *GEP = dyn_cast(Ptr)) { + APInt GEPOffset(BitWidth, 0); + if (TD && !GEP->accumulateConstantOffset(*TD, GEPOffset)) + break; + ByteOffset += GEPOffset; + Ptr = GEP->getPointerOperand(); + } else if (Operator::getOpcode(Ptr) == Instruction::BitCast) { + Ptr = cast(Ptr)->getOperand(0); + } else if (GlobalAlias *GA = dyn_cast(Ptr)) { + if (GA->mayBeOverridden()) + break; + Ptr = GA->getAliasee(); } else { - uint64_t Size = TD.getTypeAllocSize(GTI.getIndexedType()); - Offset += OpC->getSExtValue()*Size; + break; } } - - // Re-sign extend from the pointer size if needed to get overflow edge cases - // right. - unsigned PtrSize = TD.getPointerSizeInBits(); - if (PtrSize < 64) - Offset = SignExtend64(Offset, PtrSize); - - return GetPointerBaseWithConstantOffset(GEP->getPointerOperand(), Offset, TD); + Offset = ByteOffset.getSExtValue(); + return Ptr; } @@ -1636,26 +1712,26 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, // Look through bitcast instructions and geps. V = V->stripPointerCasts(); - + // If the value is a GEP instructionor constant expression, treat it as an // offset. if (const GEPOperator *GEP = dyn_cast(V)) { // Make sure the GEP has exactly three arguments. if (GEP->getNumOperands() != 3) return false; - + // Make sure the index-ee is a pointer to array of i8. PointerType *PT = cast(GEP->getOperand(0)->getType()); ArrayType *AT = dyn_cast(PT->getElementType()); if (AT == 0 || !AT->getElementType()->isIntegerTy(8)) return false; - + // Check to make sure that the first operand of the GEP is an integer and // has value 0 so that we are sure we're indexing into the initializer. const ConstantInt *FirstIdx = dyn_cast(GEP->getOperand(1)); if (FirstIdx == 0 || !FirstIdx->isZero()) return false; - + // If the second index isn't a ConstantInt, then this is a variable index // into the array. If this occurs, we can't say anything meaningful about // the string. @@ -1681,13 +1757,13 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, Str = ""; return true; } - + // Must be a Constant Array const ConstantDataArray *Array = dyn_cast(GV->getInitializer()); if (Array == 0 || !Array->isString()) return false; - + // Get the number of elements in the array uint64_t NumElts = Array->getType()->getArrayNumElements(); @@ -1696,10 +1772,10 @@ bool llvm::getConstantStringInfo(const Value *V, StringRef &Str, if (Offset > NumElts) return false; - + // Skip over 'offset' bytes. Str = Str.substr(Offset); - + if (TrimAtNul) { // Trim off the \0 and anything after it. If the array is not nul // terminated, we just return the whole end of string. The client may know @@ -1753,7 +1829,7 @@ static uint64_t GetStringLengthH(Value *V, SmallPtrSet &PHIs) { if (Len1 != Len2) return 0; return Len1; } - + // Otherwise, see if we can read the string. StringRef StrData; if (!getConstantStringInfo(V, StrData)) @@ -1940,3 +2016,19 @@ bool llvm::isSafeToSpeculativelyExecute(const Value *V, return false; // Misc instructions which have effects } } + +/// isKnownNonNull - Return true if we know that the specified value is never +/// null. +bool llvm::isKnownNonNull(const Value *V) { + // Alloca never returns null, malloc might. + if (isa(V)) return true; + + // A byval argument is never null. + if (const Argument *A = dyn_cast(V)) + return A->hasByValAttr(); + + // Global values are not null unless extern weak. + if (const GlobalValue *GV = dyn_cast(V)) + return !GV->hasExternalWeakLinkage(); + return false; +} diff --git a/lib/Archive/Archive.cpp b/lib/Archive/Archive.cpp index 1eab27d3eba3..1f36a00ab086 100644 --- a/lib/Archive/Archive.cpp +++ b/lib/Archive/Archive.cpp @@ -12,15 +12,16 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Bitcode/Archive.h" #include "ArchiveInternals.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Module.h" +#include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" #include "llvm/Support/system_error.h" -#include #include +#include using namespace llvm; // getMemberSize - compute the actual physical size of the file member as seen diff --git a/lib/Archive/ArchiveInternals.h b/lib/Archive/ArchiveInternals.h index 639f5ac2691b..f6c87e899f25 100644 --- a/lib/Archive/ArchiveInternals.h +++ b/lib/Archive/ArchiveInternals.h @@ -14,10 +14,9 @@ #ifndef LIB_ARCHIVE_ARCHIVEINTERNALS_H #define LIB_ARCHIVE_ARCHIVEINTERNALS_H +#include "llvm/ADT/StringExtras.h" #include "llvm/Bitcode/Archive.h" #include "llvm/Support/TimeValue.h" -#include "llvm/ADT/StringExtras.h" - #include #define ARFILE_MAGIC "!\n" ///< magic string diff --git a/lib/Archive/ArchiveReader.cpp b/lib/Archive/ArchiveReader.cpp index 5052495c0d62..14713e692c0f 100644 --- a/lib/Archive/ArchiveReader.cpp +++ b/lib/Archive/ArchiveReader.cpp @@ -11,14 +11,15 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Bitcode/Archive.h" #include "ArchiveInternals.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/Module.h" #include #include -#include using namespace llvm; /// Read a variable-bit-rate encoded unsigned integer @@ -176,7 +177,7 @@ Archive::parseMemberHeader(const char*& At, const char* End, std::string* error) } if (p >= endp) { if (error) - *error = "missing name termiantor in string table"; + *error = "missing name terminator in string table"; return 0; } } else { @@ -325,14 +326,14 @@ Archive::loadArchive(std::string* error) { // Open and completely load the archive file. Archive* -Archive::OpenAndLoad(const sys::Path& file, LLVMContext& C, +Archive::OpenAndLoad(const sys::Path& File, LLVMContext& C, std::string* ErrorMessage) { - std::auto_ptr result ( new Archive(file, C)); + OwningPtr result ( new Archive(File, C)); if (result->mapToMemory(ErrorMessage)) - return 0; + return NULL; if (!result->loadArchive(ErrorMessage)) - return 0; - return result.release(); + return NULL; + return result.take(); } // Get all the bitcode modules from the archive @@ -439,15 +440,15 @@ Archive::loadSymbolTable(std::string* ErrorMsg) { } // Open the archive and load just the symbol tables -Archive* Archive::OpenAndLoadSymbols(const sys::Path& file, +Archive* Archive::OpenAndLoadSymbols(const sys::Path& File, LLVMContext& C, std::string* ErrorMessage) { - std::auto_ptr result ( new Archive(file, C) ); + OwningPtr result ( new Archive(File, C) ); if (result->mapToMemory(ErrorMessage)) - return 0; + return NULL; if (!result->loadSymbolTable(ErrorMessage)) - return 0; - return result.release(); + return NULL; + return result.take(); } // Look up one symbol in the symbol table and return the module that defines diff --git a/lib/Archive/ArchiveWriter.cpp b/lib/Archive/ArchiveWriter.cpp index ec6b4b87584b..3eba701c9535 100644 --- a/lib/Archive/ArchiveWriter.cpp +++ b/lib/Archive/ArchiveWriter.cpp @@ -11,18 +11,19 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Bitcode/Archive.h" #include "ArchiveInternals.h" -#include "llvm/Module.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/Bitcode/ReaderWriter.h" +#include "llvm/IR/Module.h" #include "llvm/Support/FileSystem.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Process.h" #include "llvm/Support/Signals.h" #include "llvm/Support/system_error.h" #include -#include #include +#include using namespace llvm; // Write an integer using variable bit rate encoding. This saves a few bytes diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index a60e4aa41c42..f46383be7e46 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -12,14 +12,15 @@ //===----------------------------------------------------------------------===// #include "LLLexer.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instruction.h" -#include "llvm/LLVMContext.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/ADT/Twine.h" #include "llvm/Assembly/Parser.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include @@ -55,22 +56,12 @@ uint64_t LLLexer::atoull(const char *Buffer, const char *End) { return Result; } -static char parseHexChar(char C) { - if (C >= '0' && C <= '9') - return C-'0'; - if (C >= 'A' && C <= 'F') - return C-'A'+10; - if (C >= 'a' && C <= 'f') - return C-'a'+10; - return 0; -} - uint64_t LLLexer::HexIntToVal(const char *Buffer, const char *End) { uint64_t Result = 0; for (; Buffer != End; ++Buffer) { uint64_t OldRes = Result; Result *= 16; - Result += parseHexChar(*Buffer); + Result += hexDigitValue(*Buffer); if (Result < OldRes) { // Uh, oh, overflow detected!!! Error("constant bigger than 64 bits detected!"); @@ -86,12 +77,12 @@ void LLLexer::HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<16; i++, Buffer++) { assert(Buffer != End); Pair[0] *= 16; - Pair[0] += parseHexChar(*Buffer); + Pair[0] += hexDigitValue(*Buffer); } Pair[1] = 0; for (int i=0; i<16 && Buffer != End; i++, Buffer++) { Pair[1] *= 16; - Pair[1] += parseHexChar(*Buffer); + Pair[1] += hexDigitValue(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -105,12 +96,12 @@ void LLLexer::FP80HexToIntPair(const char *Buffer, const char *End, for (int i=0; i<4 && Buffer != End; i++, Buffer++) { assert(Buffer != End); Pair[1] *= 16; - Pair[1] += parseHexChar(*Buffer); + Pair[1] += hexDigitValue(*Buffer); } Pair[0] = 0; for (int i=0; i<16; i++, Buffer++) { Pair[0] *= 16; - Pair[0] += parseHexChar(*Buffer); + Pair[0] += hexDigitValue(*Buffer); } if (Buffer != End) Error("constant bigger than 128 bits detected!"); @@ -128,8 +119,10 @@ static void UnEscapeLexed(std::string &Str) { if (BIn < EndBuffer-1 && BIn[1] == '\\') { *BOut++ = '\\'; // Two \ becomes one BIn += 2; - } else if (BIn < EndBuffer-2 && isxdigit(BIn[1]) && isxdigit(BIn[2])) { - *BOut = parseHexChar(BIn[1]) * 16 + parseHexChar(BIn[2]); + } else if (BIn < EndBuffer-2 && + isxdigit(static_cast(BIn[1])) && + isxdigit(static_cast(BIn[2]))) { + *BOut = hexDigitValue(BIn[1]) * 16 + hexDigitValue(BIn[2]); BIn += 3; // Skip over handled chars ++BOut; } else { @@ -144,7 +137,8 @@ static void UnEscapeLexed(std::string &Str) { /// isLabelChar - Return true for [-a-zA-Z$._0-9]. static bool isLabelChar(char C) { - return isalnum(C) || C == '-' || C == '$' || C == '.' || C == '_'; + return isalnum(static_cast(C)) || C == '-' || C == '$' || + C == '.' || C == '_'; } @@ -197,7 +191,7 @@ lltok::Kind LLLexer::LexToken() { switch (CurChar) { default: // Handle letters: [a-zA-Z_] - if (isalpha(CurChar) || CurChar == '_') + if (isalpha(static_cast(CurChar)) || CurChar == '_') return LexIdentifier(); return lltok::Error; @@ -235,6 +229,7 @@ lltok::Kind LLLexer::LexToken() { SkipLineComment(); return LexToken(); case '!': return LexExclaim(); + case '#': return LexHash(); case '0': case '1': case '2': case '3': case '4': case '5': case '6': case '7': case '8': case '9': case '-': @@ -290,8 +285,8 @@ lltok::Kind LLLexer::LexAt() { return lltok::GlobalVar; // Handle GlobalVarID: @[0-9]+ - if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart+1, CurPtr); @@ -325,10 +320,12 @@ lltok::Kind LLLexer::ReadString(lltok::Kind kind) { /// ReadVarName - Read the rest of a token containing a variable name. bool LLLexer::ReadVarName() { const char *NameStart = CurPtr; - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_') ++CurPtr; @@ -354,8 +351,8 @@ lltok::Kind LLLexer::LexPercent() { return lltok::LocalVar; // Handle LocalVarID: %[0-9]+ - if (isdigit(CurPtr[0])) { - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; uint64_t Val = atoull(TokStart+1, CurPtr); @@ -389,10 +386,12 @@ lltok::Kind LLLexer::LexQuote() { /// ! lltok::Kind LLLexer::LexExclaim() { // Lex a metadata name as a MetadataVar. - if (isalpha(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + if (isalpha(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') { ++CurPtr; - while (isalnum(CurPtr[0]) || CurPtr[0] == '-' || CurPtr[0] == '$' || + while (isalnum(static_cast(CurPtr[0])) || + CurPtr[0] == '-' || CurPtr[0] == '$' || CurPtr[0] == '.' || CurPtr[0] == '_' || CurPtr[0] == '\\') ++CurPtr; @@ -402,7 +401,25 @@ lltok::Kind LLLexer::LexExclaim() { } return lltok::exclaim; } - + +/// LexHash - Lex all tokens that start with a # character: +/// AttrGrpID ::= #[0-9]+ +lltok::Kind LLLexer::LexHash() { + // Handle AttrGrpID: #[0-9]+ + if (isdigit(static_cast(CurPtr[0]))) { + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) + /*empty*/; + + uint64_t Val = atoull(TokStart+1, CurPtr); + if ((unsigned)Val != Val) + Error("invalid value number (too large)!"); + UIntVal = unsigned(Val); + return lltok::AttrGrpID; + } + + return lltok::Error; +} + /// LexIdentifier: Handle several related productions: /// Label [-a-zA-Z$._0-9]+: /// IntegerType i[0-9]+ @@ -415,8 +432,11 @@ lltok::Kind LLLexer::LexIdentifier() { for (; isLabelChar(*CurPtr); ++CurPtr) { // If we decide this is an integer, remember the end of the sequence. - if (!IntEnd && !isdigit(*CurPtr)) IntEnd = CurPtr; - if (!KeywordEnd && !isalnum(*CurPtr) && *CurPtr != '_') KeywordEnd = CurPtr; + if (!IntEnd && !isdigit(static_cast(*CurPtr))) + IntEnd = CurPtr; + if (!KeywordEnd && !isalnum(static_cast(*CurPtr)) && + *CurPtr != '_') + KeywordEnd = CurPtr; } // If we stopped due to a colon, this really is a label. @@ -445,9 +465,11 @@ lltok::Kind LLLexer::LexIdentifier() { CurPtr = KeywordEnd; --StartChar; unsigned Len = CurPtr-StartChar; -#define KEYWORD(STR) \ - if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ - return lltok::kw_##STR; +#define KEYWORD(STR) \ + do { \ + if (Len == strlen(#STR) && !memcmp(StartChar, #STR, strlen(#STR))) \ + return lltok::kw_##STR; \ + } while (0) KEYWORD(true); KEYWORD(false); KEYWORD(declare); KEYWORD(define); @@ -472,6 +494,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(hidden); KEYWORD(protected); KEYWORD(unnamed_addr); + KEYWORD(externally_initialized); KEYWORD(extern_weak); KEYWORD(external); KEYWORD(thread_local); @@ -486,7 +509,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(target); KEYWORD(triple); KEYWORD(unwind); - KEYWORD(deplibs); + KEYWORD(deplibs); // FIXME: Remove in 4.0. KEYWORD(datalayout); KEYWORD(volatile); KEYWORD(atomic); @@ -498,6 +521,11 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(seq_cst); KEYWORD(singlethread); + KEYWORD(nnan); + KEYWORD(ninf); + KEYWORD(nsz); + KEYWORD(arcp); + KEYWORD(fast); KEYWORD(nuw); KEYWORD(nsw); KEYWORD(exact); @@ -532,33 +560,39 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(cc); KEYWORD(c); - KEYWORD(signext); - KEYWORD(zeroext); + KEYWORD(attributes); + + KEYWORD(alwaysinline); + KEYWORD(byval); + KEYWORD(inlinehint); KEYWORD(inreg); - KEYWORD(sret); - KEYWORD(nounwind); - KEYWORD(noreturn); + KEYWORD(minsize); + KEYWORD(naked); + KEYWORD(nest); KEYWORD(noalias); + KEYWORD(nobuiltin); KEYWORD(nocapture); - KEYWORD(byval); - KEYWORD(nest); + KEYWORD(noduplicate); + KEYWORD(noimplicitfloat); + KEYWORD(noinline); + KEYWORD(nonlazybind); + KEYWORD(noredzone); + KEYWORD(noreturn); + KEYWORD(nounwind); + KEYWORD(optsize); KEYWORD(readnone); KEYWORD(readonly); - KEYWORD(uwtable); KEYWORD(returns_twice); - - KEYWORD(inlinehint); - KEYWORD(noinline); - KEYWORD(alwaysinline); - KEYWORD(optsize); + KEYWORD(signext); + KEYWORD(sret); KEYWORD(ssp); KEYWORD(sspreq); - KEYWORD(noredzone); - KEYWORD(noimplicitfloat); - KEYWORD(naked); - KEYWORD(nonlazybind); - KEYWORD(address_safety); - KEYWORD(minsize); + KEYWORD(sspstrong); + KEYWORD(sanitize_address); + KEYWORD(sanitize_thread); + KEYWORD(sanitize_memory); + KEYWORD(uwtable); + KEYWORD(zeroext); KEYWORD(type); KEYWORD(opaque); @@ -653,7 +687,8 @@ lltok::Kind LLLexer::LexIdentifier() { // Check for [us]0x[0-9A-Fa-f]+ which are Hexadecimal constant generated by // the CFE to avoid forcing it to deal with 64-bit numbers. if ((TokStart[0] == 'u' || TokStart[0] == 's') && - TokStart[1] == '0' && TokStart[2] == 'x' && isxdigit(TokStart[3])) { + TokStart[1] == '0' && TokStart[2] == 'x' && + isxdigit(static_cast(TokStart[3]))) { int len = CurPtr-TokStart-3; uint32_t bits = len * 4; APInt Tmp(bits, StringRef(TokStart+3, len), 16); @@ -693,13 +728,13 @@ lltok::Kind LLLexer::Lex0x() { Kind = 'J'; } - if (!isxdigit(CurPtr[0])) { + if (!isxdigit(static_cast(CurPtr[0]))) { // Bad token, return it as an error. CurPtr = TokStart+1; return lltok::Error; } - while (isxdigit(CurPtr[0])) + while (isxdigit(static_cast(CurPtr[0]))) ++CurPtr; if (Kind == 'J') { @@ -716,20 +751,21 @@ lltok::Kind LLLexer::Lex0x() { case 'K': // F80HexFPConstant - x87 long double in hexadecimal format (10 bytes) FP80HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(80, Pair)); + APFloatVal = APFloat(APFloat::x87DoubleExtended, APInt(80, Pair)); return lltok::APFloat; case 'L': // F128HexFPConstant - IEEE 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, Pair), true); + APFloatVal = APFloat(APFloat::IEEEquad, APInt(128, Pair)); return lltok::APFloat; case 'M': // PPC128HexFPConstant - PowerPC 128-bit in hexadecimal format (16 bytes) HexToIntPair(TokStart+3, CurPtr, Pair); - APFloatVal = APFloat(APInt(128, Pair)); + APFloatVal = APFloat(APFloat::PPCDoubleDouble, APInt(128, Pair)); return lltok::APFloat; case 'H': - APFloatVal = APFloat(APInt(16,HexIntToVal(TokStart+3, CurPtr))); + APFloatVal = APFloat(APFloat::IEEEhalf, + APInt(16,HexIntToVal(TokStart+3, CurPtr))); return lltok::APFloat; } } @@ -744,8 +780,9 @@ lltok::Kind LLLexer::Lex0x() { /// HexFP128Constant 0xL[0-9A-Fa-f]+ /// HexPPC128Constant 0xM[0-9A-Fa-f]+ lltok::Kind LLLexer::LexDigitOrNegative() { - // If the letter after the negative is a number, this is probably a label. - if (!isdigit(TokStart[0]) && !isdigit(CurPtr[0])) { + // If the letter after the negative is not a number, this is probably a label. + if (!isdigit(static_cast(TokStart[0])) && + !isdigit(static_cast(CurPtr[0]))) { // Okay, this is not a number after the -, it's probably a label. if (const char *End = isLabelTail(CurPtr)) { StrVal.assign(TokStart, End-1); @@ -759,7 +796,7 @@ lltok::Kind LLLexer::LexDigitOrNegative() { // At this point, it is either a label, int or fp constant. // Skip digits, we have at least one. - for (; isdigit(CurPtr[0]); ++CurPtr) + for (; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // Check to see if this really is a label afterall, e.g. "-1:". @@ -796,13 +833,14 @@ lltok::Kind LLLexer::LexDigitOrNegative() { ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || - ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + if (isdigit(static_cast(CurPtr[1])) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && + isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } @@ -814,11 +852,11 @@ lltok::Kind LLLexer::LexDigitOrNegative() { lltok::Kind LLLexer::LexPositive() { // If the letter after the negative is a number, this is probably not a // label. - if (!isdigit(CurPtr[0])) + if (!isdigit(static_cast(CurPtr[0]))) return lltok::Error; // Skip digits. - for (++CurPtr; isdigit(CurPtr[0]); ++CurPtr) + for (++CurPtr; isdigit(static_cast(CurPtr[0])); ++CurPtr) /*empty*/; // At this point, we need a '.'. @@ -830,13 +868,14 @@ lltok::Kind LLLexer::LexPositive() { ++CurPtr; // Skip over [0-9]*([eE][-+]?[0-9]+)? - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; if (CurPtr[0] == 'e' || CurPtr[0] == 'E') { - if (isdigit(CurPtr[1]) || - ((CurPtr[1] == '-' || CurPtr[1] == '+') && isdigit(CurPtr[2]))) { + if (isdigit(static_cast(CurPtr[1])) || + ((CurPtr[1] == '-' || CurPtr[1] == '+') && + isdigit(static_cast(CurPtr[2])))) { CurPtr += 2; - while (isdigit(CurPtr[0])) ++CurPtr; + while (isdigit(static_cast(CurPtr[0]))) ++CurPtr; } } diff --git a/lib/AsmParser/LLLexer.h b/lib/AsmParser/LLLexer.h index 09aea5b01825..85703c766b09 100644 --- a/lib/AsmParser/LLLexer.h +++ b/lib/AsmParser/LLLexer.h @@ -15,8 +15,8 @@ #define LIB_ASMPARSER_LLLEXER_H #include "LLToken.h" -#include "llvm/ADT/APSInt.h" #include "llvm/ADT/APFloat.h" +#include "llvm/ADT/APSInt.h" #include "llvm/Support/SourceMgr.h" #include @@ -81,6 +81,7 @@ namespace llvm { lltok::Kind LexPercent(); lltok::Kind LexQuote(); lltok::Kind Lex0x(); + lltok::Kind LexHash(); uint64_t atoull(const char *Buffer, const char *End); uint64_t HexIntToVal(const char *Buffer, const char *End); diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index b24291ffb329..c8da1f8bc661 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "LLParser.h" -#include "llvm/AutoUpgrade.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" -#include "llvm/ValueSymbolTable.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/AutoUpgrade.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -52,10 +52,10 @@ bool LLParser::ValidateEndOfModule() { I != E; ++I) { Instruction *Inst = I->first; const std::vector &MDList = I->second; - + for (unsigned i = 0, e = MDList.size(); i != e; ++i) { unsigned SlotNo = MDList[i].MDSlot; - + if (SlotNo >= NumberedMetadata.size() || NumberedMetadata[SlotNo] == 0) return Error(MDList[i].Loc, "use of undefined metadata '!" + Twine(SlotNo) + "'"); @@ -64,8 +64,66 @@ bool LLParser::ValidateEndOfModule() { } ForwardRefInstMetadata.clear(); } - - + + // Handle any function attribute group forward references. + for (std::map >::iterator + I = ForwardRefAttrGroups.begin(), E = ForwardRefAttrGroups.end(); + I != E; ++I) { + Value *V = I->first; + std::vector &Vec = I->second; + AttrBuilder B; + + for (std::vector::iterator VI = Vec.begin(), VE = Vec.end(); + VI != VE; ++VI) + B.merge(NumberedAttrBuilders[*VI]); + + if (Function *Fn = dyn_cast(V)) { + AttributeSet AS = Fn->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + + FnAttrs.merge(B); + + // If the alignment was parsed as an attribute, move to the alignment + // field. + if (FnAttrs.hasAlignmentAttr()) { + Fn->setAlignment(FnAttrs.getAlignment()); + FnAttrs.removeAttribute(Attribute::Alignment); + } + + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + Fn->setAttributes(AS); + } else if (CallInst *CI = dyn_cast(V)) { + AttributeSet AS = CI->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + FnAttrs.merge(B); + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + CI->setAttributes(AS); + } else if (InvokeInst *II = dyn_cast(V)) { + AttributeSet AS = II->getAttributes(); + AttrBuilder FnAttrs(AS.getFnAttributes(), AttributeSet::FunctionIndex); + AS = AS.removeAttributes(Context, AttributeSet::FunctionIndex, + AS.getFnAttributes()); + FnAttrs.merge(B); + AS = AS.addAttributes(Context, AttributeSet::FunctionIndex, + AttributeSet::get(Context, + AttributeSet::FunctionIndex, + FnAttrs)); + II->setAttributes(AS); + } else { + llvm_unreachable("invalid object with forward attribute group reference"); + } + } + // If there are entries in ForwardRefBlockAddresses at this point, they are // references after the function was defined. Resolve those now. while (!ForwardRefBlockAddresses.empty()) { @@ -76,19 +134,19 @@ bool LLParser::ValidateEndOfModule() { TheFn = M->getFunction(Fn.StrVal); else if (Fn.UIntVal < NumberedVals.size()) TheFn = dyn_cast(NumberedVals[Fn.UIntVal]); - + if (TheFn == 0) return Error(Fn.Loc, "unknown function referenced by blockaddress"); - + // Resolve all these references. - if (ResolveForwardRefBlockAddresses(TheFn, + if (ResolveForwardRefBlockAddresses(TheFn, ForwardRefBlockAddresses.begin()->second, 0)) return true; - + ForwardRefBlockAddresses.erase(ForwardRefBlockAddresses.begin()); } - + for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) if (NumberedTypes[i].second.isValid()) return Error(NumberedTypes[i].second, @@ -123,7 +181,7 @@ bool LLParser::ValidateEndOfModule() { return false; } -bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, +bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, std::vector > &Refs, PerFunctionState *PFS) { // Loop over all the references, resolving them. @@ -141,11 +199,11 @@ bool LLParser::ResolveForwardRefBlockAddresses(Function *TheFn, Res = dyn_cast_or_null( TheFn->getValueSymbolTable().lookup(Refs[i].first.StrVal)); } - + if (Res == 0) return Error(Refs[i].first.Loc, "referenced value is not a basic block"); - + // Get the BlockAddress for this and update references to use it. BlockAddress *BA = BlockAddress::get(TheFn, Res); Refs[i].second->replaceAllUsesWith(BA); @@ -174,7 +232,7 @@ bool LLParser::ParseTopLevelEntities() { case lltok::GlobalID: if (ParseUnnamedGlobal()) return true; break; case lltok::GlobalVar: if (ParseNamedGlobal()) return true; break; case lltok::exclaim: if (ParseStandaloneMetadata()) return true; break; - case lltok::MetadataVar: if (ParseNamedMetadata()) return true; break; + case lltok::MetadataVar:if (ParseNamedMetadata()) return true; break; // The Global variable production with no name can have many different // optional leading prefixes, the production is: @@ -220,6 +278,8 @@ bool LLParser::ParseTopLevelEntities() { case lltok::kw_global: // GlobalType if (ParseGlobal("", SMLoc(), 0, false, 0)) return true; break; + + case lltok::kw_attributes: if (ParseUnnamedAttrGrp()) return true; break; } } } @@ -267,6 +327,7 @@ bool LLParser::ParseTargetDefinition() { /// toplevelentity /// ::= 'deplibs' '=' '[' ']' /// ::= 'deplibs' '=' '[' STRINGCONSTANT (',' STRINGCONSTANT)* ']' +/// FIXME: Remove in 4.0. Currently parse, but ignore. bool LLParser::ParseDepLibs() { assert(Lex.getKind() == lltok::kw_deplibs); Lex.Lex(); @@ -277,14 +338,10 @@ bool LLParser::ParseDepLibs() { if (EatIfPresent(lltok::rsquare)) return false; - std::string Str; - if (ParseStringConstant(Str)) return true; - M->addLibrary(Str); - - while (EatIfPresent(lltok::comma)) { + do { + std::string Str; if (ParseStringConstant(Str)) return true; - M->addLibrary(Str); - } + } while (EatIfPresent(lltok::comma)); return ParseToken(lltok::rsquare, "expected ']' at end of list"); } @@ -302,11 +359,11 @@ bool LLParser::ParseUnnamedType() { if (TypeID >= NumberedTypes.size()) NumberedTypes.resize(TypeID+1); - + Type *Result = 0; if (ParseStructDefinition(TypeLoc, "", NumberedTypes[TypeID], Result)) return true; - + if (!isa(Result)) { std::pair &Entry = NumberedTypes[TypeID]; if (Entry.first) @@ -329,11 +386,11 @@ bool LLParser::ParseNamedType() { if (ParseToken(lltok::equal, "expected '=' after name") || ParseToken(lltok::kw_type, "expected 'type' after name")) return true; - + Type *Result = 0; if (ParseStructDefinition(NameLoc, Name, NamedTypes[Name], Result)) return true; - + if (!isa(Result)) { std::pair &Entry = NamedTypes[Name]; if (Entry.first) @@ -341,7 +398,7 @@ bool LLParser::ParseNamedType() { Entry.first = Result; Entry.second = SMLoc(); } - + return false; } @@ -473,7 +530,7 @@ bool LLParser::ParseMDNodeID(MDNode *&Result) { // Otherwise, create MDNode forward reference. MDNode *FwdNode = MDNode::getTemporary(Context, ArrayRef()); ForwardRefMDNodes[MID] = std::make_pair(FwdNode, Lex.getLoc()); - + if (NumberedMetadata.size() <= MID) NumberedMetadata.resize(MID+1); NumberedMetadata[MID] = FwdNode; @@ -498,7 +555,7 @@ bool LLParser::ParseNamedMetadata() { do { if (ParseToken(lltok::exclaim, "Expected '!' here")) return true; - + MDNode *N = 0; if (ParseMDNodeID(N)) return true; NMD->addOperand(N); @@ -530,7 +587,7 @@ bool LLParser::ParseStandaloneMetadata() { return true; MDNode *Init = MDNode::get(Context, Elts); - + // See if this was forward referenced, if so, handle it. std::map, LocTy> >::iterator FI = ForwardRefMDNodes.find(MetadataID); @@ -539,7 +596,7 @@ bool LLParser::ParseStandaloneMetadata() { Temp->replaceAllUsesWith(Init); MDNode::deleteTemporary(Temp); ForwardRefMDNodes.erase(FI); - + assert(NumberedMetadata[MetadataID] == Init && "Tracking VH didn't work"); } else { if (MetadataID >= NumberedMetadata.size()) @@ -635,9 +692,11 @@ bool LLParser::ParseAlias(const std::string &Name, LocTy NameLoc, /// ParseGlobal /// ::= GlobalVar '=' OptionalLinkage OptionalVisibility OptionalThreadLocal -/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const +/// OptionalAddrSpace OptionalUnNammedAddr +/// OptionalExternallyInitialized GlobalType Type Const /// ::= OptionalLinkage OptionalVisibility OptionalThreadLocal -/// OptionalAddrSpace OptionalUnNammedAddr GlobalType Type Const +/// OptionalAddrSpace OptionalUnNammedAddr +/// OptionalExternallyInitialized GlobalType Type Const /// /// Everything through visibility has been parsed already. /// @@ -645,9 +704,10 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, unsigned Linkage, bool HasLinkage, unsigned Visibility) { unsigned AddrSpace; - bool IsConstant, UnnamedAddr; + bool IsConstant, UnnamedAddr, IsExternallyInitialized; GlobalVariable::ThreadLocalMode TLM; LocTy UnnamedAddrLoc; + LocTy IsExternallyInitializedLoc; LocTy TyLoc; Type *Ty = 0; @@ -655,6 +715,9 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, ParseOptionalAddrSpace(AddrSpace) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || + ParseOptionalToken(lltok::kw_externally_initialized, + IsExternallyInitialized, + &IsExternallyInitializedLoc) || ParseGlobalType(IsConstant) || ParseType(Ty, TyLoc)) return true; @@ -712,6 +775,7 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, GV->setConstant(IsConstant); GV->setLinkage((GlobalValue::LinkageTypes)Linkage); GV->setVisibility((GlobalValue::VisibilityTypes)Visibility); + GV->setExternallyInitialized(IsExternallyInitialized); GV->setThreadLocalMode(TLM); GV->setUnnamedAddr(UnnamedAddr); @@ -736,6 +800,159 @@ bool LLParser::ParseGlobal(const std::string &Name, LocTy NameLoc, return false; } +/// ParseUnnamedAttrGrp +/// ::= 'attributes' AttrGrpID '=' '{' AttrValPair+ '}' +bool LLParser::ParseUnnamedAttrGrp() { + assert(Lex.getKind() == lltok::kw_attributes); + LocTy AttrGrpLoc = Lex.getLoc(); + Lex.Lex(); + + assert(Lex.getKind() == lltok::AttrGrpID); + unsigned VarID = Lex.getUIntVal(); + std::vector unused; + LocTy NoBuiltinLoc; + Lex.Lex(); + + if (ParseToken(lltok::equal, "expected '=' here") || + ParseToken(lltok::lbrace, "expected '{' here") || + ParseFnAttributeValuePairs(NumberedAttrBuilders[VarID], unused, true, + NoBuiltinLoc) || + ParseToken(lltok::rbrace, "expected end of attribute group")) + return true; + + if (!NumberedAttrBuilders[VarID].hasAttributes()) + return Error(AttrGrpLoc, "attribute group has no attributes"); + + return false; +} + +/// ParseFnAttributeValuePairs +/// ::= | '=' +bool LLParser::ParseFnAttributeValuePairs(AttrBuilder &B, + std::vector &FwdRefAttrGrps, + bool inAttrGrp, LocTy &NoBuiltinLoc) { + bool HaveError = false; + + B.clear(); + + while (true) { + lltok::Kind Token = Lex.getKind(); + if (Token == lltok::kw_nobuiltin) + NoBuiltinLoc = Lex.getLoc(); + switch (Token) { + default: + if (!inAttrGrp) return HaveError; + return Error(Lex.getLoc(), "unterminated attribute group"); + case lltok::rbrace: + // Finished. + return false; + + case lltok::AttrGrpID: { + // Allow a function to reference an attribute group: + // + // define void @foo() #1 { ... } + if (inAttrGrp) + HaveError |= + Error(Lex.getLoc(), + "cannot have an attribute group reference in an attribute group"); + + unsigned AttrGrpNum = Lex.getUIntVal(); + if (inAttrGrp) break; + + // Save the reference to the attribute group. We'll fill it in later. + FwdRefAttrGrps.push_back(AttrGrpNum); + break; + } + // Target-dependent attributes: + case lltok::StringConstant: { + std::string Attr = Lex.getStrVal(); + Lex.Lex(); + std::string Val; + if (EatIfPresent(lltok::equal) && + ParseStringConstant(Val)) + return true; + + B.addAttribute(Attr, Val); + continue; + } + + // Target-independent attributes: + case lltok::kw_align: { + // As a hack, we allow "align 2" on functions as a synonym for "alignstack + // 2". + unsigned Alignment; + if (inAttrGrp) { + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + } else { + if (ParseOptionalAlignment(Alignment)) + return true; + } + B.addAlignmentAttr(Alignment); + continue; + } + case lltok::kw_alignstack: { + unsigned Alignment; + if (inAttrGrp) { + Lex.Lex(); + if (ParseToken(lltok::equal, "expected '=' here") || + ParseUInt32(Alignment)) + return true; + } else { + if (ParseOptionalStackAlignment(Alignment)) + return true; + } + B.addStackAlignmentAttr(Alignment); + continue; + } + case lltok::kw_alwaysinline: B.addAttribute(Attribute::AlwaysInline); break; + case lltok::kw_inlinehint: B.addAttribute(Attribute::InlineHint); break; + case lltok::kw_minsize: B.addAttribute(Attribute::MinSize); break; + case lltok::kw_naked: B.addAttribute(Attribute::Naked); break; + case lltok::kw_nobuiltin: B.addAttribute(Attribute::NoBuiltin); break; + case lltok::kw_noduplicate: B.addAttribute(Attribute::NoDuplicate); break; + case lltok::kw_noimplicitfloat: B.addAttribute(Attribute::NoImplicitFloat); break; + case lltok::kw_noinline: B.addAttribute(Attribute::NoInline); break; + case lltok::kw_nonlazybind: B.addAttribute(Attribute::NonLazyBind); break; + case lltok::kw_noredzone: B.addAttribute(Attribute::NoRedZone); break; + case lltok::kw_noreturn: B.addAttribute(Attribute::NoReturn); break; + case lltok::kw_nounwind: B.addAttribute(Attribute::NoUnwind); break; + case lltok::kw_optsize: B.addAttribute(Attribute::OptimizeForSize); break; + case lltok::kw_readnone: B.addAttribute(Attribute::ReadNone); break; + case lltok::kw_readonly: B.addAttribute(Attribute::ReadOnly); break; + case lltok::kw_returns_twice: B.addAttribute(Attribute::ReturnsTwice); break; + case lltok::kw_ssp: B.addAttribute(Attribute::StackProtect); break; + case lltok::kw_sspreq: B.addAttribute(Attribute::StackProtectReq); break; + case lltok::kw_sspstrong: B.addAttribute(Attribute::StackProtectStrong); break; + case lltok::kw_sanitize_address: B.addAttribute(Attribute::SanitizeAddress); break; + case lltok::kw_sanitize_thread: B.addAttribute(Attribute::SanitizeThread); break; + case lltok::kw_sanitize_memory: B.addAttribute(Attribute::SanitizeMemory); break; + case lltok::kw_uwtable: B.addAttribute(Attribute::UWTable); break; + + // Error handling. + case lltok::kw_inreg: + case lltok::kw_signext: + case lltok::kw_zeroext: + HaveError |= + Error(Lex.getLoc(), + "invalid use of attribute on a function"); + break; + case lltok::kw_byval: + case lltok::kw_nest: + case lltok::kw_noalias: + case lltok::kw_nocapture: + case lltok::kw_sret: + HaveError |= + Error(Lex.getLoc(), + "invalid use of parameter-only attribute on a function"); + break; + } + + Lex.Lex(); + } +} //===----------------------------------------------------------------------===// // GlobalValue Reference/Resolution Routines. @@ -915,11 +1132,8 @@ bool LLParser::ParseOptionalAddrSpace(unsigned &AddrSpace) { ParseToken(lltok::rparen, "expected ')' in address space"); } -/// ParseOptionalAttrs - Parse a potentially empty attribute list. AttrKind -/// indicates what kind of attribute list this is: 0: function arg, 1: result, -/// 2: function attr. -bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { - LocTy AttrLoc = Lex.getLoc(); +/// ParseOptionalParamAttrs - Parse a potentially empty list of parameter attributes. +bool LLParser::ParseOptionalParamAttrs(AttrBuilder &B) { bool HaveError = false; B.clear(); @@ -929,42 +1143,6 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { switch (Token) { default: // End of attributes. return HaveError; - case lltok::kw_zeroext: B.addAttribute(Attributes::ZExt); break; - case lltok::kw_signext: B.addAttribute(Attributes::SExt); break; - case lltok::kw_inreg: B.addAttribute(Attributes::InReg); break; - case lltok::kw_sret: B.addAttribute(Attributes::StructRet); break; - case lltok::kw_noalias: B.addAttribute(Attributes::NoAlias); break; - case lltok::kw_nocapture: B.addAttribute(Attributes::NoCapture); break; - case lltok::kw_byval: B.addAttribute(Attributes::ByVal); break; - case lltok::kw_nest: B.addAttribute(Attributes::Nest); break; - - case lltok::kw_noreturn: B.addAttribute(Attributes::NoReturn); break; - case lltok::kw_nounwind: B.addAttribute(Attributes::NoUnwind); break; - case lltok::kw_uwtable: B.addAttribute(Attributes::UWTable); break; - case lltok::kw_returns_twice: B.addAttribute(Attributes::ReturnsTwice); break; - case lltok::kw_noinline: B.addAttribute(Attributes::NoInline); break; - case lltok::kw_readnone: B.addAttribute(Attributes::ReadNone); break; - case lltok::kw_readonly: B.addAttribute(Attributes::ReadOnly); break; - case lltok::kw_inlinehint: B.addAttribute(Attributes::InlineHint); break; - case lltok::kw_alwaysinline: B.addAttribute(Attributes::AlwaysInline); break; - case lltok::kw_optsize: B.addAttribute(Attributes::OptimizeForSize); break; - case lltok::kw_ssp: B.addAttribute(Attributes::StackProtect); break; - case lltok::kw_sspreq: B.addAttribute(Attributes::StackProtectReq); break; - case lltok::kw_noredzone: B.addAttribute(Attributes::NoRedZone); break; - case lltok::kw_noimplicitfloat: B.addAttribute(Attributes::NoImplicitFloat); break; - case lltok::kw_naked: B.addAttribute(Attributes::Naked); break; - case lltok::kw_nonlazybind: B.addAttribute(Attributes::NonLazyBind); break; - case lltok::kw_address_safety: B.addAttribute(Attributes::AddressSafety); break; - case lltok::kw_minsize: B.addAttribute(Attributes::MinSize); break; - - case lltok::kw_alignstack: { - unsigned Alignment; - if (ParseOptionalStackAlignment(Alignment)) - return true; - B.addStackAlignmentAttr(Alignment); - continue; - } - case lltok::kw_align: { unsigned Alignment; if (ParseOptionalAlignment(Alignment)) @@ -972,51 +1150,70 @@ bool LLParser::ParseOptionalAttrs(AttrBuilder &B, unsigned AttrKind) { B.addAlignmentAttr(Alignment); continue; } - + case lltok::kw_byval: B.addAttribute(Attribute::ByVal); break; + case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; + case lltok::kw_nest: B.addAttribute(Attribute::Nest); break; + case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; + case lltok::kw_nocapture: B.addAttribute(Attribute::NoCapture); break; + case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; + case lltok::kw_sret: B.addAttribute(Attribute::StructRet); break; + case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; + + case lltok::kw_alignstack: case lltok::kw_nounwind: + case lltok::kw_alwaysinline: case lltok::kw_optsize: + case lltok::kw_inlinehint: case lltok::kw_readnone: + case lltok::kw_minsize: case lltok::kw_readonly: + case lltok::kw_naked: case lltok::kw_returns_twice: + case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: + case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_memory: + case lltok::kw_noinline: case lltok::kw_sanitize_thread: + case lltok::kw_nonlazybind: case lltok::kw_ssp: + case lltok::kw_noredzone: case lltok::kw_sspreq: + case lltok::kw_noreturn: case lltok::kw_uwtable: + HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); + break; } - // Perform some error checking. - switch (Token) { - default: - if (AttrKind == 2) - HaveError |= Error(AttrLoc, "invalid use of attribute on a function"); - break; - case lltok::kw_align: - // As a hack, we allow "align 2" on functions as a synonym for - // "alignstack 2". - break; + Lex.Lex(); + } +} - // Parameter Only: - case lltok::kw_sret: - case lltok::kw_nocapture: - case lltok::kw_byval: - case lltok::kw_nest: - if (AttrKind != 0) - HaveError |= Error(AttrLoc, "invalid use of parameter-only attribute"); +/// ParseOptionalReturnAttrs - Parse a potentially empty list of return attributes. +bool LLParser::ParseOptionalReturnAttrs(AttrBuilder &B) { + bool HaveError = false; + + B.clear(); + + while (1) { + lltok::Kind Token = Lex.getKind(); + switch (Token) { + default: // End of attributes. + return HaveError; + case lltok::kw_inreg: B.addAttribute(Attribute::InReg); break; + case lltok::kw_noalias: B.addAttribute(Attribute::NoAlias); break; + case lltok::kw_signext: B.addAttribute(Attribute::SExt); break; + case lltok::kw_zeroext: B.addAttribute(Attribute::ZExt); break; + + // Error handling. + case lltok::kw_sret: case lltok::kw_nocapture: + case lltok::kw_byval: case lltok::kw_nest: + HaveError |= Error(Lex.getLoc(), "invalid use of parameter-only attribute"); break; - // Function Only: - case lltok::kw_noreturn: - case lltok::kw_nounwind: - case lltok::kw_readnone: - case lltok::kw_readonly: - case lltok::kw_noinline: - case lltok::kw_alwaysinline: - case lltok::kw_optsize: - case lltok::kw_ssp: - case lltok::kw_sspreq: - case lltok::kw_noredzone: - case lltok::kw_noimplicitfloat: - case lltok::kw_naked: - case lltok::kw_inlinehint: - case lltok::kw_alignstack: - case lltok::kw_uwtable: - case lltok::kw_nonlazybind: - case lltok::kw_returns_twice: - case lltok::kw_address_safety: - case lltok::kw_minsize: - if (AttrKind != 2) - HaveError |= Error(AttrLoc, "invalid use of function-only attribute"); + case lltok::kw_align: case lltok::kw_noreturn: + case lltok::kw_alignstack: case lltok::kw_nounwind: + case lltok::kw_alwaysinline: case lltok::kw_optsize: + case lltok::kw_inlinehint: case lltok::kw_readnone: + case lltok::kw_minsize: case lltok::kw_readonly: + case lltok::kw_naked: case lltok::kw_returns_twice: + case lltok::kw_nobuiltin: case lltok::kw_sanitize_address: + case lltok::kw_noduplicate: case lltok::kw_sanitize_memory: + case lltok::kw_noimplicitfloat: case lltok::kw_sanitize_thread: + case lltok::kw_noinline: case lltok::kw_ssp: + case lltok::kw_nonlazybind: case lltok::kw_sspreq: + case lltok::kw_noredzone: case lltok::kw_sspstrong: + case lltok::kw_uwtable: + HaveError |= Error(Lex.getLoc(), "invalid use of function-only attribute"); break; } @@ -1207,7 +1404,7 @@ bool LLParser::ParseOptionalAlignment(unsigned &Alignment) { } /// ParseOptionalCommaAlign -/// ::= +/// ::= /// ::= ',' align 4 /// /// This returns with AteExtraComma set to true if it ate an excess comma at the @@ -1221,7 +1418,7 @@ bool LLParser::ParseOptionalCommaAlign(unsigned &Alignment, AteExtraComma = true; return false; } - + if (Lex.getKind() != lltok::kw_align) return Error(Lex.getLoc(), "expected metadata or 'align'"); @@ -1289,7 +1486,7 @@ bool LLParser::ParseOptionalStackAlignment(unsigned &Alignment) { bool LLParser::ParseIndexList(SmallVectorImpl &Indices, bool &AteExtraComma) { AteExtraComma = false; - + if (Lex.getKind() != lltok::comma) return TokError("expected ',' as start of index list"); @@ -1345,7 +1542,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { case lltok::LocalVar: { // Type ::= %foo std::pair &Entry = NamedTypes[Lex.getStrVal()]; - + // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). if (Entry.first == 0) { @@ -1362,7 +1559,7 @@ bool LLParser::ParseType(Type *&Result, bool AllowVoid) { if (Lex.getUIntVal() >= NumberedTypes.size()) NumberedTypes.resize(Lex.getUIntVal()+1); std::pair &Entry = NumberedTypes[Lex.getUIntVal()]; - + // If the type hasn't been defined yet, create a forward definition and // remember where that forward def'n was seen (in case it never is defined). if (Entry.first == 0) { @@ -1432,6 +1629,7 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, if (ParseToken(lltok::lparen, "expected '(' in call")) return true; + unsigned AttrIndex = 1; while (Lex.getKind() != lltok::rparen) { // If this isn't the first argument, we need a comma. if (!ArgList.empty() && @@ -1447,10 +1645,11 @@ bool LLParser::ParseParameterList(SmallVectorImpl &ArgList, return true; // Otherwise, handle normal operands. - if (ParseOptionalAttrs(ArgAttrs, 0) || ParseValue(ArgTy, V, PFS)) + if (ParseOptionalParamAttrs(ArgAttrs) || ParseValue(ArgTy, V, PFS)) return true; - ArgList.push_back(ParamInfo(ArgLoc, V, Attributes::get(V->getContext(), - ArgAttrs))); + ArgList.push_back(ParamInfo(ArgLoc, V, AttributeSet::get(V->getContext(), + AttrIndex++, + ArgAttrs))); } Lex.Lex(); // Lex the ')'. @@ -1486,7 +1685,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, std::string Name; if (ParseType(ArgTy) || - ParseOptionalAttrs(Attrs, 0)) return true; + ParseOptionalParamAttrs(Attrs)) return true; if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); @@ -1499,9 +1698,10 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, if (!FunctionType::isValidArgumentType(ArgTy)) return Error(TypeLoc, "invalid type for function argument"); + unsigned AttrIndex = 1; ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - Attributes::get(ArgTy->getContext(), - Attrs), Name)); + AttributeSet::get(ArgTy->getContext(), + AttrIndex++, Attrs), Name)); while (EatIfPresent(lltok::comma)) { // Handle ... at end of arg list. @@ -1512,7 +1712,7 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, // Otherwise must be an argument type. TypeLoc = Lex.getLoc(); - if (ParseType(ArgTy) || ParseOptionalAttrs(Attrs, 0)) return true; + if (ParseType(ArgTy) || ParseOptionalParamAttrs(Attrs)) return true; if (ArgTy->isVoidTy()) return Error(TypeLoc, "argument can not have void type"); @@ -1528,7 +1728,8 @@ bool LLParser::ParseArgumentList(SmallVectorImpl &ArgList, return Error(TypeLoc, "invalid type for function argument"); ArgList.push_back(ArgInfo(TypeLoc, ArgTy, - Attributes::get(ArgTy->getContext(), Attrs), + AttributeSet::get(ArgTy->getContext(), + AttrIndex++, Attrs), Name)); } } @@ -1553,7 +1754,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { if (!ArgList[i].Name.empty()) return Error(ArgList[i].Loc, "argument name invalid in function type"); - if (ArgList[i].Attrs.hasAttributes()) + if (ArgList[i].Attrs.hasAttributes(i + 1)) return Error(ArgList[i].Loc, "argument attributes invalid in function type"); } @@ -1571,7 +1772,7 @@ bool LLParser::ParseFunctionType(Type *&Result) { bool LLParser::ParseAnonStructType(Type *&Result, bool Packed) { SmallVector Elts; if (ParseStructBody(Elts)) return true; - + Result = StructType::get(Context, Elts, Packed); return false; } @@ -1583,20 +1784,20 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, // If the type was already defined, diagnose the redefinition. if (Entry.first && !Entry.second.isValid()) return Error(TypeLoc, "redefinition of type"); - + // If we have opaque, just return without filling in the definition for the // struct. This counts as a definition as far as the .ll file goes. if (EatIfPresent(lltok::kw_opaque)) { // This type is being defined, so clear the location to indicate this. Entry.second = SMLoc(); - + // If this type number has never been uttered, create it. if (Entry.first == 0) Entry.first = StructType::create(Context, Name); ResultTy = Entry.first; return false; } - + // If the type starts with '<', then it is either a packed struct or a vector. bool isPacked = EatIfPresent(lltok::less); @@ -1606,27 +1807,27 @@ bool LLParser::ParseStructDefinition(SMLoc TypeLoc, StringRef Name, if (Lex.getKind() != lltok::lbrace) { if (Entry.first) return Error(TypeLoc, "forward references to non-struct type"); - + ResultTy = 0; if (isPacked) return ParseArrayVectorType(ResultTy, true); return ParseType(ResultTy); } - + // This type is being defined, so clear the location to indicate this. Entry.second = SMLoc(); - + // If this type number has never been uttered, create it. if (Entry.first == 0) Entry.first = StructType::create(Context, Name); - + StructType *STy = cast(Entry.first); - + SmallVector Body; if (ParseStructBody(Body) || (isPacked && ParseToken(lltok::greater, "expected '>' in packed struct"))) return true; - + STy->setBody(Body, isPacked); ResultTy = STy; return false; @@ -1699,8 +1900,7 @@ bool LLParser::ParseArrayVectorType(Type *&Result, bool isVector) { if ((unsigned)Size != Size) return Error(SizeLoc, "size too large for vector"); if (!VectorType::isValidElementType(EltTy)) - return Error(TypeLoc, - "vector element type must be fp, integer or a pointer to these types"); + return Error(TypeLoc, "invalid vector element type"); Result = VectorType::get(EltTy, unsigned(Size)); } else { if (!ArrayType::isValidElementType(EltTy)) @@ -1757,18 +1957,18 @@ bool LLParser::PerFunctionState::FinishFunction() { FunctionID.Kind = ValID::t_GlobalID; FunctionID.UIntVal = FunctionNumber; } - + std::map > >::iterator FRBAI = P.ForwardRefBlockAddresses.find(FunctionID); if (FRBAI != P.ForwardRefBlockAddresses.end()) { // Resolve all these references. if (P.ResolveForwardRefBlockAddresses(&F, FRBAI->second, this)) return true; - + P.ForwardRefBlockAddresses.erase(FRBAI); } } - + if (!ForwardRefVals.empty()) return P.Error(ForwardRefVals.begin()->second.second, "use of undefined value '%" + ForwardRefVals.begin()->first + @@ -2118,7 +2318,8 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { return false; case lltok::kw_asm: { - // ValID ::= 'asm' SideEffect? AlignStack? STRINGCONSTANT ',' STRINGCONSTANT + // ValID ::= 'asm' SideEffect? AlignStack? IntelDialect? STRINGCONSTANT ',' + // STRINGCONSTANT bool HasSideEffect, AlignStack, AsmDialect; Lex.Lex(); if (ParseOptionalToken(lltok::kw_sideeffect, HasSideEffect) || @@ -2141,19 +2342,19 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ValID Fn, Label; LocTy FnLoc, LabelLoc; - + if (ParseToken(lltok::lparen, "expected '(' in block address expression") || ParseValID(Fn) || ParseToken(lltok::comma, "expected comma in block address expression")|| ParseValID(Label) || ParseToken(lltok::rparen, "expected ')' in block address expression")) return true; - + if (Fn.Kind != ValID::t_GlobalID && Fn.Kind != ValID::t_GlobalName) return Error(Fn.Loc, "expected function name in blockaddress"); if (Label.Kind != ValID::t_LocalID && Label.Kind != ValID::t_LocalName) return Error(Label.Loc, "expected basic block name in blockaddress"); - + // Make a global variable as a placeholder for this reference. GlobalVariable *FwdRef = new GlobalVariable(*M, Type::getInt8Ty(Context), false, GlobalValue::InternalLinkage, @@ -2163,7 +2364,7 @@ bool LLParser::ParseValID(ValID &ID, PerFunctionState *PFS) { ID.Kind = ValID::t_Constant; return false; } - + case lltok::kw_trunc: case lltok::kw_zext: case lltok::kw_sext: @@ -2543,7 +2744,7 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, return (V == 0); case ValID::t_InlineAsm: { PointerType *PTy = dyn_cast(Ty); - FunctionType *FTy = + FunctionType *FTy = PTy ? dyn_cast(PTy->getElementType()) : 0; if (!FTy || !InlineAsm::Verify(FTy, ID.StrVal2)) return Error(ID.Loc, "invalid type for inline asm constraint string"); @@ -2632,13 +2833,13 @@ bool LLParser::ConvertValIDToValue(Type *Ty, ValID &ID, Value *&V, "initializer with struct type has wrong # elements"); if (ST->isPacked() != (ID.Kind == ValID::t_PackedConstantStruct)) return Error(ID.Loc, "packed'ness of initializer and type don't match"); - + // Verify that the elements are compatible with the structtype. for (unsigned i = 0, e = ID.UIntVal; i != e; ++i) if (ID.ConstantStructElts[i]->getType() != ST->getElementType(i)) return Error(ID.Loc, "element " + Twine(i) + " of struct initializer doesn't match struct element type"); - + V = ConstantStruct::get(ST, makeArrayRef(ID.ConstantStructElts, ID.UIntVal)); } else @@ -2690,7 +2891,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (ParseOptionalLinkage(Linkage) || ParseOptionalVisibility(Visibility) || ParseOptionalCallingConv(CC) || - ParseOptionalAttrs(RetAttrs, 1) || + ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/)) return true; @@ -2748,6 +2949,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { SmallVector ArgList; bool isVarArg; AttrBuilder FuncAttrs; + std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; std::string Section; unsigned Alignment; std::string GC; @@ -2757,7 +2960,8 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (ParseArgumentList(ArgList, isVarArg) || ParseOptionalToken(lltok::kw_unnamed_addr, UnnamedAddr, &UnnamedAddrLoc) || - ParseOptionalAttrs(FuncAttrs, 2) || + ParseFnAttributeValuePairs(FuncAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc) || (EatIfPresent(lltok::kw_section) && ParseStringConstant(Section)) || ParseOptionalAlignment(Alignment) || @@ -2765,39 +2969,41 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { ParseStringConstant(GC))) return true; + if (FuncAttrs.contains(Attribute::NoBuiltin)) + return Error(NoBuiltinLoc, "'nobuiltin' attribute not valid on function"); + // If the alignment was parsed as an attribute, move to the alignment field. if (FuncAttrs.hasAlignmentAttr()) { Alignment = FuncAttrs.getAlignment(); - FuncAttrs.removeAttribute(Attributes::Alignment); + FuncAttrs.removeAttribute(Attribute::Alignment); } // Okay, if we got here, the function is syntactically valid. Convert types // and do semantic checks. std::vector ParamTypeList; - SmallVector Attrs; + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, - Attributes::get(RetType->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); for (unsigned i = 0, e = ArgList.size(); i != e; ++i) { ParamTypeList.push_back(ArgList[i].Ty); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (FuncAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::FunctionIndex, - Attributes::get(RetType->getContext(), - FuncAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FuncAttrs)); - AttrListPtr PAL = AttrListPtr::get(Context, Attrs); + AttributeSet PAL = AttributeSet::get(Context, Attrs); - if (PAL.getParamAttributes(1).hasAttribute(Attributes::StructRet) && - !RetType->isVoidTy()) + if (PAL.hasAttribute(1, Attribute::StructRet) && !RetType->isVoidTy()) return Error(RetTypeLoc, "functions with 'sret' argument must return void"); FunctionType *FT = @@ -2818,7 +3024,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { if (Fn->getType() != PFT) return Error(FRVI->second.second, "invalid forward reference to " "function '" + FunctionName + "' with wrong type!"); - + ForwardRefVals.erase(FRVI); } else if ((Fn = M->getFunction(FunctionName))) { // Reject redefinitions. @@ -2858,6 +3064,7 @@ bool LLParser::ParseFunctionHeader(Function *&Fn, bool isDefine) { Fn->setAlignment(Alignment); Fn->setSection(Section); if (!GC.empty()) Fn->setGC(GC.c_str()); + ForwardRefAttrGroups[Fn] = FwdRefAttrGrps; // Add all of the arguments we parsed to the function. Function::arg_iterator ArgIt = Fn->arg_begin(); @@ -2887,13 +3094,13 @@ bool LLParser::ParseFunctionBody(Function &Fn) { int FunctionNumber = -1; if (!Fn.hasName()) FunctionNumber = NumberedVals.size()-1; - + PerFunctionState PFS(*this, Fn, FunctionNumber); // We need at least one basic block. if (Lex.getKind() == lltok::rbrace) return TokError("function body requires at least one basic block"); - + while (Lex.getKind() != lltok::rbrace) if (ParseBasicBlock(PFS)) return true; @@ -2961,7 +3168,7 @@ bool LLParser::ParseBasicBlock(PerFunctionState &PFS) { // *must* be followed by metadata. if (ParseInstructionMetadata(Inst, &PFS)) return true; - break; + break; } // Set the name on the instruction. @@ -3004,16 +3211,26 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, bool NUW = EatIfPresent(lltok::kw_nuw); bool NSW = EatIfPresent(lltok::kw_nsw); if (!NUW) NUW = EatIfPresent(lltok::kw_nuw); - + if (ParseArithmetic(Inst, PFS, KeywordVal, 1)) return true; - + if (NUW) cast(Inst)->setHasNoUnsignedWrap(true); if (NSW) cast(Inst)->setHasNoSignedWrap(true); return false; } case lltok::kw_fadd: case lltok::kw_fsub: - case lltok::kw_fmul: return ParseArithmetic(Inst, PFS, KeywordVal, 2); + case lltok::kw_fmul: + case lltok::kw_fdiv: + case lltok::kw_frem: { + FastMathFlags FMF = EatFastMathFlagsIfPresent(); + int Res = ParseArithmetic(Inst, PFS, KeywordVal, 2); + if (Res != 0) + return Res; + if (FMF.any()) + Inst->setFastMathFlags(FMF); + return 0; + } case lltok::kw_sdiv: case lltok::kw_udiv: @@ -3028,8 +3245,6 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, case lltok::kw_urem: case lltok::kw_srem: return ParseArithmetic(Inst, PFS, KeywordVal, 1); - case lltok::kw_fdiv: - case lltok::kw_frem: return ParseArithmetic(Inst, PFS, KeywordVal, 2); case lltok::kw_and: case lltok::kw_or: case lltok::kw_xor: return ParseLogical(Inst, PFS, KeywordVal); @@ -3075,7 +3290,7 @@ int LLParser::ParseInstruction(Instruction *&Inst, BasicBlock *BB, bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { if (Opc == Instruction::FCmp) { switch (Lex.getKind()) { - default: TokError("expected fcmp predicate (e.g. 'oeq')"); + default: return TokError("expected fcmp predicate (e.g. 'oeq')"); case lltok::kw_oeq: P = CmpInst::FCMP_OEQ; break; case lltok::kw_one: P = CmpInst::FCMP_ONE; break; case lltok::kw_olt: P = CmpInst::FCMP_OLT; break; @@ -3095,7 +3310,7 @@ bool LLParser::ParseCmpPredicate(unsigned &P, unsigned Opc) { } } else { switch (Lex.getKind()) { - default: TokError("expected icmp predicate (e.g. 'eq')"); + default: return TokError("expected icmp predicate (e.g. 'eq')"); case lltok::kw_eq: P = CmpInst::ICMP_EQ; break; case lltok::kw_ne: P = CmpInst::ICMP_NE; break; case lltok::kw_slt: P = CmpInst::ICMP_SLT; break; @@ -3126,12 +3341,12 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, if (ParseType(Ty, true /*void allowed*/)) return true; Type *ResType = PFS.getFunction().getReturnType(); - + if (Ty->isVoidTy()) { if (!ResType->isVoidTy()) return Error(TypeLoc, "value doesn't match function result type '" + getTypeString(ResType) + "'"); - + Inst = ReturnInst::Create(Context); return false; } @@ -3142,7 +3357,7 @@ bool LLParser::ParseRet(Instruction *&Inst, BasicBlock *BB, if (ResType != RV->getType()) return Error(TypeLoc, "value doesn't match function result type '" + getTypeString(ResType) + "'"); - + Inst = ReturnInst::Create(Context, RV); return false; } @@ -3204,7 +3419,7 @@ bool LLParser::ParseSwitch(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after case value") || ParseTypeAndBasicBlock(DestBB, PFS)) return true; - + if (!SeenCases.insert(Constant)) return Error(CondLoc, "duplicate case value in switch"); if (!isa(Constant)) @@ -3232,26 +3447,26 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { ParseToken(lltok::comma, "expected ',' after indirectbr address") || ParseToken(lltok::lsquare, "expected '[' with indirectbr")) return true; - + if (!Address->getType()->isPointerTy()) return Error(AddrLoc, "indirectbr address must have pointer type"); - + // Parse the destination list. SmallVector DestList; - + if (Lex.getKind() != lltok::rsquare) { BasicBlock *DestBB; if (ParseTypeAndBasicBlock(DestBB, PFS)) return true; DestList.push_back(DestBB); - + while (EatIfPresent(lltok::comma)) { if (ParseTypeAndBasicBlock(DestBB, PFS)) return true; DestList.push_back(DestBB); } } - + if (ParseToken(lltok::rsquare, "expected ']' at end of block list")) return true; @@ -3269,6 +3484,8 @@ bool LLParser::ParseIndirectBr(Instruction *&Inst, PerFunctionState &PFS) { bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { LocTy CallLoc = Lex.getLoc(); AttrBuilder RetAttrs, FnAttrs; + std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3277,11 +3494,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { BasicBlock *NormalBB, *UnwindBB; if (ParseOptionalCallingConv(CC) || - ParseOptionalAttrs(RetAttrs, 1) || + ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseOptionalAttrs(FnAttrs, 2) || + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc) || ParseToken(lltok::kw_to, "expected 'to' in invoke") || ParseTypeAndBasicBlock(NormalBB, PFS) || ParseToken(lltok::kw_unwind, "expected 'unwind' in invoke") || @@ -3311,13 +3529,12 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; - // Set up the Attributes for the function. - SmallVector Attrs; + // Set up the Attribute for the function. + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, - Attributes::get(Callee->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); SmallVector Args; @@ -3337,25 +3554,27 @@ bool LLParser::ParseInvoke(Instruction *&Inst, PerFunctionState &PFS) { return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::FunctionIndex, - Attributes::get(Callee->getContext(), - FnAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FnAttrs)); - // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Context, Attrs); + // Finish off the Attribute and check them + AttributeSet PAL = AttributeSet::get(Context, Attrs); InvokeInst *II = InvokeInst::Create(Callee, NormalBB, UnwindBB, Args); II->setCallingConv(CC); II->setAttributes(PAL); + ForwardRefAttrGroups[II] = FwdRefAttrGrps; Inst = II; return false; } @@ -3674,6 +3893,8 @@ bool LLParser::ParseLandingPad(Instruction *&Inst, PerFunctionState &PFS) { bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, bool isTail) { AttrBuilder RetAttrs, FnAttrs; + std::vector FwdRefAttrGrps; + LocTy NoBuiltinLoc; CallingConv::ID CC; Type *RetType = 0; LocTy RetTypeLoc; @@ -3683,11 +3904,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, if ((isTail && ParseToken(lltok::kw_call, "expected 'tail call'")) || ParseOptionalCallingConv(CC) || - ParseOptionalAttrs(RetAttrs, 1) || + ParseOptionalReturnAttrs(RetAttrs) || ParseType(RetType, RetTypeLoc, true /*void allowed*/) || ParseValID(CalleeID) || ParseParameterList(ArgList, PFS) || - ParseOptionalAttrs(FnAttrs, 2)) + ParseFnAttributeValuePairs(FnAttrs, FwdRefAttrGrps, false, + NoBuiltinLoc)) return true; // If RetType is a non-function pointer type, then this is the short syntax @@ -3713,13 +3935,12 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, Value *Callee; if (ConvertValIDToValue(PFTy, CalleeID, Callee, &PFS)) return true; - // Set up the Attributes for the function. - SmallVector Attrs; + // Set up the Attribute for the function. + SmallVector Attrs; if (RetAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::ReturnIndex, - Attributes::get(Callee->getContext(), - RetAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::ReturnIndex, + RetAttrs)); SmallVector Args; @@ -3739,26 +3960,28 @@ bool LLParser::ParseCall(Instruction *&Inst, PerFunctionState &PFS, return Error(ArgList[i].Loc, "argument is not of expected type '" + getTypeString(ExpectedTy) + "'"); Args.push_back(ArgList[i].V); - if (ArgList[i].Attrs.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(i+1, ArgList[i].Attrs)); + if (ArgList[i].Attrs.hasAttributes(i + 1)) { + AttrBuilder B(ArgList[i].Attrs, i + 1); + Attrs.push_back(AttributeSet::get(RetType->getContext(), i + 1, B)); + } } if (I != E) return Error(CallLoc, "not enough parameters specified for call"); if (FnAttrs.hasAttributes()) - Attrs.push_back( - AttributeWithIndex::get(AttrListPtr::FunctionIndex, - Attributes::get(Callee->getContext(), - FnAttrs))); + Attrs.push_back(AttributeSet::get(RetType->getContext(), + AttributeSet::FunctionIndex, + FnAttrs)); - // Finish off the Attributes and check them - AttrListPtr PAL = AttrListPtr::get(Context, Attrs); + // Finish off the Attribute and check them + AttributeSet PAL = AttributeSet::get(Context, Attrs); CallInst *CI = CallInst::Create(Callee, Args); CI->setTailCall(isTail); CI->setCallingConv(CC); CI->setAttributes(PAL); + ForwardRefAttrGroups[CI] = FwdRefAttrGrps; Inst = CI; return false; } @@ -3798,7 +4021,7 @@ int LLParser::ParseAlloc(Instruction *&Inst, PerFunctionState &PFS) { /// ParseLoad /// ::= 'load' 'volatile'? TypeAndValue (',' 'align' i32)? -/// ::= 'load' 'atomic' 'volatile'? TypeAndValue +/// ::= 'load' 'atomic' 'volatile'? TypeAndValue /// 'singlethread'? AtomicOrdering (',' 'align' i32)? int LLParser::ParseLoad(Instruction *&Inst, PerFunctionState &PFS) { Value *Val; LocTy Loc; @@ -4034,9 +4257,6 @@ int LLParser::ParseGetElementPtr(Instruction *&Inst, PerFunctionState &PFS) { Indices.push_back(Val); } - if (Val && Val->getType()->isVectorTy() && Indices.size() != 1) - return Error(EltLoc, "vector getelementptrs must have a single index"); - if (!GetElementPtrInst::getIndexedType(Ptr->getType(), Indices)) return Error(Loc, "invalid getelementptr indices"); Inst = GetElementPtrInst::Create(Ptr, Indices); @@ -4075,7 +4295,7 @@ int LLParser::ParseInsertValue(Instruction *&Inst, PerFunctionState &PFS) { ParseTypeAndValue(Val1, Loc1, PFS) || ParseIndexList(Indices, AteExtraComma)) return true; - + if (!Val0->getType()->isAggregateType()) return Error(Loc0, "insertvalue operand must be aggregate type"); @@ -4105,7 +4325,7 @@ bool LLParser::ParseMDNodeVector(SmallVectorImpl &Elts, Elts.push_back(0); continue; } - + Value *V = 0; if (ParseTypeAndValue(V, PFS)) return true; Elts.push_back(V); diff --git a/lib/AsmParser/LLParser.h b/lib/AsmParser/LLParser.h index c6bbdb27aeef..1f2879e948d9 100644 --- a/lib/AsmParser/LLParser.h +++ b/lib/AsmParser/LLParser.h @@ -15,12 +15,13 @@ #define LLVM_ASMPARSER_LLPARSER_H #include "LLLexer.h" -#include "llvm/Attributes.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/StringMap.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ValueHandle.h" #include @@ -55,7 +56,7 @@ namespace llvm { t_ConstantStruct, // Value in ConstantStructElts. t_PackedConstantStruct // Value in ConstantStructElts. } Kind; - + LLLexer::LocTy Loc; unsigned UIntVal; std::string StrVal, StrVal2; @@ -65,23 +66,23 @@ namespace llvm { MDNode *MDNodeVal; MDString *MDStringVal; Constant **ConstantStructElts; - + ValID() : Kind(t_LocalID), APFloatVal(0.0) {} ~ValID() { if (Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) delete [] ConstantStructElts; } - + bool operator<(const ValID &RHS) const { if (Kind == t_LocalID || Kind == t_GlobalID) return UIntVal < RHS.UIntVal; assert((Kind == t_LocalName || Kind == t_GlobalName || - Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && + Kind == t_ConstantStruct || Kind == t_PackedConstantStruct) && "Ordering not defined for this ValID kind yet"); return StrVal < RHS.StrVal; } }; - + class LLParser { public: typedef LLLexer::LocTy LocTy; @@ -89,7 +90,7 @@ namespace llvm { LLVMContext &Context; LLLexer Lex; Module *M; - + // Instruction metadata resolution. Each instruction can have a list of // MDRef info associated with them. // @@ -110,7 +111,7 @@ namespace llvm { // have processed a use of the type but not a definition yet. StringMap > NamedTypes; std::vector > NumberedTypes; - + std::vector > NumberedMetadata; std::map, LocTy> > ForwardRefMDNodes; @@ -118,14 +119,18 @@ namespace llvm { std::map > ForwardRefVals; std::map > ForwardRefValIDs; std::vector NumberedVals; - + // References to blockaddress. The key is the function ValID, the value is // a list of references to blocks in that function. std::map > > ForwardRefBlockAddresses; - + + // Attribute builder reference information. + std::map > ForwardRefAttrGroups; + std::map NumberedAttrBuilders; + public: - LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : + LLParser(MemoryBuffer *F, SourceMgr &SM, SMDiagnostic &Err, Module *m) : Context(m->getContext()), Lex(F, SM, Err, m->getContext()), M(m) {} bool Run(); @@ -154,6 +159,21 @@ namespace llvm { Lex.Lex(); return true; } + + FastMathFlags EatFastMathFlagsIfPresent() { + FastMathFlags FMF; + while (true) + switch (Lex.getKind()) { + case lltok::kw_fast: FMF.setUnsafeAlgebra(); Lex.Lex(); continue; + case lltok::kw_nnan: FMF.setNoNaNs(); Lex.Lex(); continue; + case lltok::kw_ninf: FMF.setNoInfs(); Lex.Lex(); continue; + case lltok::kw_nsz: FMF.setNoSignedZeros(); Lex.Lex(); continue; + case lltok::kw_arcp: FMF.setAllowReciprocal(); Lex.Lex(); continue; + default: return FMF; + } + return FMF; + } + bool ParseOptionalToken(lltok::Kind T, bool &Present, LocTy *Loc = 0) { if (Lex.getKind() != T) { Present = false; @@ -175,7 +195,8 @@ namespace llvm { bool ParseTLSModel(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalThreadLocal(GlobalVariable::ThreadLocalMode &TLM); bool ParseOptionalAddrSpace(unsigned &AddrSpace); - bool ParseOptionalAttrs(AttrBuilder &Attrs, unsigned AttrKind); + bool ParseOptionalParamAttrs(AttrBuilder &B); + bool ParseOptionalReturnAttrs(AttrBuilder &B); bool ParseOptionalLinkage(unsigned &Linkage, bool &HasLinkage); bool ParseOptionalLinkage(unsigned &Linkage) { bool HasLinkage; return ParseOptionalLinkage(Linkage, HasLinkage); @@ -200,8 +221,8 @@ namespace llvm { bool ParseTopLevelEntities(); bool ValidateEndOfModule(); bool ParseTargetDefinition(); - bool ParseDepLibs(); bool ParseModuleAsm(); + bool ParseDepLibs(); // FIXME: Remove in 4.0. bool ParseUnnamedType(); bool ParseNamedType(); bool ParseDeclare(); @@ -218,6 +239,10 @@ namespace llvm { bool ParseMDString(MDString *&Result); bool ParseMDNodeID(MDNode *&Result); bool ParseMDNodeID(MDNode *&Result, unsigned &SlotNo); + bool ParseUnnamedAttrGrp(); + bool ParseFnAttributeValuePairs(AttrBuilder &B, + std::vector &FwdRefAttrGrps, + bool inAttrGrp, LocTy &NoBuiltinLoc); // Type Parsing. bool ParseType(Type *&Result, bool AllowVoid = false); @@ -241,7 +266,7 @@ namespace llvm { std::map > ForwardRefVals; std::map > ForwardRefValIDs; std::vector NumberedVals; - + /// FunctionNumber - If this is an unnamed function, this is the slot /// number of it, otherwise it is -1. int FunctionNumber; @@ -308,8 +333,8 @@ namespace llvm { struct ParamInfo { LocTy Loc; Value *V; - Attributes Attrs; - ParamInfo(LocTy loc, Value *v, Attributes attrs) + AttributeSet Attrs; + ParamInfo(LocTy loc, Value *v, AttributeSet attrs) : Loc(loc), V(v), Attrs(attrs) {} }; bool ParseParameterList(SmallVectorImpl &ArgList, @@ -329,9 +354,9 @@ namespace llvm { struct ArgInfo { LocTy Loc; Type *Ty; - Attributes Attrs; + AttributeSet Attrs; std::string Name; - ArgInfo(LocTy L, Type *ty, Attributes Attr, const std::string &N) + ArgInfo(LocTy L, Type *ty, AttributeSet Attr, const std::string &N) : Loc(L), Ty(ty), Attrs(Attr), Name(N) {} }; bool ParseArgumentList(SmallVectorImpl &ArgList, bool &isVarArg); @@ -375,8 +400,8 @@ namespace llvm { int ParseGetElementPtr(Instruction *&I, PerFunctionState &PFS); int ParseExtractValue(Instruction *&I, PerFunctionState &PFS); int ParseInsertValue(Instruction *&I, PerFunctionState &PFS); - - bool ResolveForwardRefBlockAddresses(Function *TheFn, + + bool ResolveForwardRefBlockAddresses(Function *TheFn, std::vector > &Refs, PerFunctionState *PFS); }; diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 036686d31823..cd25ba30008f 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -30,6 +30,7 @@ namespace lltok { lparen, rparen, // ( ) backslash, // \ (not /) exclaim, // ! + hash, // # kw_x, kw_true, kw_false, @@ -44,6 +45,7 @@ namespace lltok { kw_dllimport, kw_dllexport, kw_common, kw_available_externally, kw_default, kw_hidden, kw_protected, kw_unnamed_addr, + kw_externally_initialized, kw_extern_weak, kw_external, kw_thread_local, kw_localdynamic, kw_initialexec, kw_localexec, @@ -54,12 +56,17 @@ namespace lltok { kw_target, kw_triple, kw_unwind, - kw_deplibs, + kw_deplibs, // FIXME: Remove in 4.0 kw_datalayout, kw_volatile, kw_atomic, kw_unordered, kw_monotonic, kw_acquire, kw_release, kw_acq_rel, kw_seq_cst, kw_singlethread, + kw_nnan, + kw_ninf, + kw_nsz, + kw_arcp, + kw_fast, kw_nuw, kw_nsw, kw_exact, @@ -84,33 +91,39 @@ namespace lltok { kw_ptx_kernel, kw_ptx_device, kw_spir_kernel, kw_spir_func, - kw_signext, - kw_zeroext, + // Attributes: + kw_attributes, + kw_alwaysinline, + kw_sanitize_address, + kw_byval, + kw_inlinehint, kw_inreg, - kw_sret, - kw_nounwind, - kw_noreturn, + kw_minsize, + kw_naked, + kw_nest, kw_noalias, + kw_nobuiltin, kw_nocapture, - kw_byval, - kw_nest, + kw_noduplicate, + kw_noimplicitfloat, + kw_noinline, + kw_nonlazybind, + kw_noredzone, + kw_noreturn, + kw_nounwind, + kw_optsize, kw_readnone, kw_readonly, - kw_uwtable, kw_returns_twice, - - kw_inlinehint, - kw_noinline, - kw_alwaysinline, - kw_optsize, + kw_signext, kw_ssp, kw_sspreq, - kw_noredzone, - kw_noimplicitfloat, - kw_naked, - kw_nonlazybind, - kw_address_safety, - kw_minsize, + kw_sspstrong, + kw_sret, + kw_sanitize_thread, + kw_sanitize_memory, + kw_uwtable, + kw_zeroext, kw_type, kw_opaque, @@ -147,6 +160,7 @@ namespace lltok { // Unsigned Valued tokens (UIntVal). GlobalID, // @42 LocalVarID, // %42 + AttrGrpID, // #42 // String valued tokens (StrVal). LabelStr, // foo: diff --git a/lib/AsmParser/Parser.cpp b/lib/AsmParser/Parser.cpp index 21b7fd411e3d..bb4f03bacc17 100644 --- a/lib/AsmParser/Parser.cpp +++ b/lib/AsmParser/Parser.cpp @@ -13,10 +13,10 @@ #include "llvm/Assembly/Parser.h" #include "LLParser.h" -#include "llvm/Module.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Support/SourceMgr.h" +#include "llvm/IR/Module.h" #include "llvm/Support/MemoryBuffer.h" +#include "llvm/Support/SourceMgr.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/system_error.h" #include diff --git a/lib/Bitcode/Reader/BitReader.cpp b/lib/Bitcode/Reader/BitReader.cpp index 15844c0041c3..5cd6c552bd8a 100644 --- a/lib/Bitcode/Reader/BitReader.cpp +++ b/lib/Bitcode/Reader/BitReader.cpp @@ -9,10 +9,10 @@ #include "llvm-c/BitReader.h" #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/LLVMContext.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/MemoryBuffer.h" -#include #include +#include using namespace llvm; @@ -30,7 +30,7 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, LLVMModuleRef *OutModule, char **OutMessage) { std::string Message; - + *OutModule = wrap(ParseBitcodeFile(unwrap(MemBuf), *unwrap(ContextRef), &Message)); if (!*OutModule) { @@ -38,19 +38,19 @@ LLVMBool LLVMParseBitcodeInContext(LLVMContextRef ContextRef, *OutMessage = strdup(Message.c_str()); return 1; } - + return 0; } /* Reads a module from the specified path, returning via the OutModule parameter a module provider which performs lazy deserialization. Returns 0 on success. - Optionally returns a human-readable error message via OutMessage. */ + Optionally returns a human-readable error message via OutMessage. */ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, LLVMMemoryBufferRef MemBuf, LLVMModuleRef *OutM, char **OutMessage) { std::string Message; - + *OutM = wrap(getLazyBitcodeModule(unwrap(MemBuf), *unwrap(ContextRef), &Message)); if (!*OutM) { @@ -58,7 +58,7 @@ LLVMBool LLVMGetBitcodeModuleInContext(LLVMContextRef ContextRef, *OutMessage = strdup(Message.c_str()); return 1; } - + return 0; } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 4ec9da12ddcf..f34884391a74 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -6,26 +6,22 @@ // License. See LICENSE.TXT for details. // //===----------------------------------------------------------------------===// -// -// This header defines the BitcodeReader class. -// -//===----------------------------------------------------------------------===// #include "llvm/Bitcode/ReaderWriter.h" #include "BitcodeReader.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InlineAsm.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" -#include "llvm/AutoUpgrade.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/AutoUpgrade.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/DataStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/MemoryBuffer.h" -#include "llvm/OperandTraits.h" using namespace llvm; enum { @@ -47,7 +43,7 @@ void BitcodeReader::FreeState() { ValueList.clear(); MDValueList.clear(); - std::vector().swap(MAttributes); + std::vector().swap(MAttributes); std::vector().swap(FunctionBBs); std::vector().swap(FunctionsWithBodies); DeferredFunctionInfo.clear(); @@ -432,6 +428,26 @@ Type *BitcodeReader::getTypeByID(unsigned ID) { // Functions for parsing blocks from the bitcode file //===----------------------------------------------------------------------===// + +/// \brief This fills an AttrBuilder object with the LLVM attributes that have +/// been decoded from the given integer. This function must stay in sync with +/// 'encodeLLVMAttributesForBitcode'. +static void decodeLLVMAttributesForBitcode(AttrBuilder &B, + uint64_t EncodedAttrs) { + // FIXME: Remove in 4.0. + + // The alignment is stored as a 16-bit raw value from bits 31--16. We shift + // the bits above 31 down by 11 bits. + unsigned Alignment = (EncodedAttrs & (0xffffULL << 16)) >> 16; + assert((!Alignment || isPowerOf2_32(Alignment)) && + "Alignment must be a power of two."); + + if (Alignment) + B.addAlignmentAttr(Alignment); + B.addRawValue(((EncodedAttrs & (0xfffffULL << 32)) >> 11) | + (EncodedAttrs & 0xffff)); +} + bool BitcodeReader::ParseAttributeBlock() { if (Stream.EnterSubBlock(bitc::PARAMATTR_BLOCK_ID)) return Error("Malformed block record"); @@ -441,54 +457,124 @@ bool BitcodeReader::ParseAttributeBlock() { SmallVector Record; - SmallVector Attrs; + SmallVector Attrs; // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("Error at end of PARAMATTR block"); + case BitstreamEntry::EndBlock: return false; + case BitstreamEntry::Record: + // The interesting case. + break; } - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; + // Read a record. + Record.clear(); + switch (Stream.readRecord(Entry.ID, Record)) { + default: // Default behavior: ignore. + break; + case bitc::PARAMATTR_CODE_ENTRY_OLD: { // ENTRY: [paramidx0, attr0, ...] + // FIXME: Remove in 4.0. + if (Record.size() & 1) + return Error("Invalid ENTRY record"); + + for (unsigned i = 0, e = Record.size(); i != e; i += 2) { + AttrBuilder B; + decodeLLVMAttributesForBitcode(B, Record[i+1]); + Attrs.push_back(AttributeSet::get(Context, Record[i], B)); + } + + MAttributes.push_back(AttributeSet::get(Context, Attrs)); + Attrs.clear(); + break; } + case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [attrgrp0, attrgrp1, ...] + for (unsigned i = 0, e = Record.size(); i != e; ++i) + Attrs.push_back(MAttributeGroups[Record[i]]); - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + MAttributes.push_back(AttributeSet::get(Context, Attrs)); + Attrs.clear(); + break; + } + } + } +} + +bool BitcodeReader::ParseAttributeGroupBlock() { + if (Stream.EnterSubBlock(bitc::PARAMATTR_GROUP_BLOCK_ID)) + return Error("Malformed block record"); + + if (!MAttributeGroups.empty()) + return Error("Multiple PARAMATTR_GROUP blocks found!"); + + SmallVector Record; + + // Read all the records. + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("Error at end of PARAMATTR_GROUP block"); + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; - case bitc::PARAMATTR_CODE_ENTRY: { // ENTRY: [paramidx0, attr0, ...] - if (Record.size() & 1) + case bitc::PARAMATTR_GRP_CODE_ENTRY: { // ENTRY: [grpid, idx, a0, a1, ...] + if (Record.size() < 3) return Error("Invalid ENTRY record"); - for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - Attributes ReconstitutedAttr = - Attributes::decodeLLVMAttributesForBitcode(Context, Record[i+1]); - Record[i+1] = ReconstitutedAttr.Raw(); - } + uint64_t GrpID = Record[0]; + uint64_t Idx = Record[1]; // Index of the object this attribute refers to. - for (unsigned i = 0, e = Record.size(); i != e; i += 2) { - AttrBuilder B(Record[i+1]); - if (B.hasAttributes()) - Attrs.push_back(AttributeWithIndex::get(Record[i], - Attributes::get(Context, B))); + AttrBuilder B; + for (unsigned i = 2, e = Record.size(); i != e; ++i) { + if (Record[i] == 0) { // Enum attribute + B.addAttribute(Attribute::AttrKind(Record[++i])); + } else if (Record[i] == 1) { // Align attribute + if (Attribute::AttrKind(Record[++i]) == Attribute::Alignment) + B.addAlignmentAttr(Record[++i]); + else + B.addStackAlignmentAttr(Record[++i]); + } else { // String attribute + assert((Record[i] == 3 || Record[i] == 4) && + "Invalid attribute group entry"); + bool HasValue = (Record[i++] == 4); + SmallString<64> KindStr; + SmallString<64> ValStr; + + while (Record[i] != 0 && i != e) + KindStr += Record[i++]; + assert(Record[i] == 0 && "Kind string not null terminated"); + + if (HasValue) { + // Has a value associated with it. + ++i; // Skip the '0' that terminates the "kind" string. + while (Record[i] != 0 && i != e) + ValStr += Record[i++]; + assert(Record[i] == 0 && "Value string not null terminated"); + } + + B.addAttribute(KindStr.str(), ValStr.str()); + } } - MAttributes.push_back(AttrListPtr::get(Context, Attrs)); - Attrs.clear(); + MAttributeGroups[GrpID] = AttributeSet::get(Context, Idx, B); break; } } @@ -513,32 +599,26 @@ bool BitcodeReader::ParseTypeTableBody() { // Read all the records for this type table. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + Error("Error in the type table block"); + return true; + case BitstreamEntry::EndBlock: if (NumRecords != TypeList.size()) return Error("Invalid type forward reference in TYPE_BLOCK"); - if (Stream.ReadBlockEnd()) - return Error("Error at end of type table block"); return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); Type *ResultTy = 0; - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: return Error("unknown type in type table"); case bitc::TYPE_CODE_NUMENTRY: // TYPE_CODE_NUMENTRY: [numentries] // TYPE_CODE_NUMENTRY contains a count of the number of types in the @@ -607,7 +687,7 @@ bool BitcodeReader::ParseTypeTableBody() { else break; } - + ResultTy = getTypeByID(Record[2]); if (ResultTy == 0 || ArgTys.size() < Record.size()-3) return Error("invalid type in function type"); @@ -626,7 +706,7 @@ bool BitcodeReader::ParseTypeTableBody() { else break; } - + ResultTy = getTypeByID(Record[1]); if (ResultTy == 0 || ArgTys.size() < Record.size()-2) return Error("invalid type in function type"); @@ -657,10 +737,10 @@ bool BitcodeReader::ParseTypeTableBody() { case bitc::TYPE_CODE_STRUCT_NAMED: { // STRUCT: [ispacked, eltty x N] if (Record.size() < 1) return Error("Invalid STRUCT type record"); - + if (NumRecords >= TypeList.size()) return Error("invalid TYPE table"); - + // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { @@ -669,7 +749,7 @@ bool BitcodeReader::ParseTypeTableBody() { } else // Otherwise, create a new struct. Res = StructType::create(Context, TypeName); TypeName.clear(); - + SmallVector EltTys; for (unsigned i = 1, e = Record.size(); i != e; ++i) { if (Type *T = getTypeByID(Record[i])) @@ -689,7 +769,7 @@ bool BitcodeReader::ParseTypeTableBody() { if (NumRecords >= TypeList.size()) return Error("invalid TYPE table"); - + // Check to see if this was forward referenced, if so fill in the temp. StructType *Res = cast_or_null(TypeList[NumRecords]); if (Res) { @@ -700,7 +780,7 @@ bool BitcodeReader::ParseTypeTableBody() { TypeName.clear(); ResultTy = Res; break; - } + } case bitc::TYPE_CODE_ARRAY: // ARRAY: [numelts, eltty] if (Record.size() < 2) return Error("Invalid ARRAY type record"); @@ -736,28 +816,22 @@ bool BitcodeReader::ParseValueSymbolTable() { // Read all the records for this value table. SmallString<128> ValueName; while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of value symbol table block"); - return false; - } - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed value symbol table block"); + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::VST_CODE_ENTRY: { // VST_ENTRY: [valueid, namechar x N] @@ -797,41 +871,35 @@ bool BitcodeReader::ParseMetadata() { // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); - return false; - } + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + Error("malformed metadata block"); + return true; + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. + break; } bool IsFunctionLocal = false; // Read a record. Record.clear(); - Code = Stream.ReadRecord(Code, Record); + unsigned Code = Stream.readRecord(Entry.ID, Record); switch (Code) { default: // Default behavior: ignore. break; case bitc::METADATA_NAME: { - // Read named of the named metadata. + // Read name of the named metadata. SmallString<8> Name(Record.begin(), Record.end()); Record.clear(); Code = Stream.ReadCode(); // METADATA_NAME is always followed by METADATA_NAMED_NODE. - unsigned NextBitCode = Stream.ReadRecord(Code, Record); + unsigned NextBitCode = Stream.readRecord(Code, Record); assert(NextBitCode == bitc::METADATA_NAMED_NODE); (void)NextBitCode; // Read named metadata elements. @@ -958,27 +1026,29 @@ bool BitcodeReader::ParseConstants() { Type *CurTy = Type::getInt32Ty(Context); unsigned NextCstNo = ValueList.size(); while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed block record in AST file"); + case BitstreamEntry::EndBlock: + if (NextCstNo != ValueList.size()) + return Error("Invalid constant reference!"); + + // Once all the constants have been read, go through and resolve forward + // references. + ValueList.ResolveConstantForwardRefs(); + return false; + case BitstreamEntry::Record: + // The interesting case. break; - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; } // Read a record. Record.clear(); Value *V = 0; - unsigned BitCode = Stream.ReadRecord(Code, Record); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: unknown constant case bitc::CST_CODE_UNDEF: // UNDEF @@ -1006,28 +1076,34 @@ bool BitcodeReader::ParseConstants() { APInt VInt = ReadWideAPInt(Record, cast(CurTy)->getBitWidth()); V = ConstantInt::get(Context, VInt); - + break; } case bitc::CST_CODE_FLOAT: { // FLOAT: [fpval] if (Record.empty()) return Error("Invalid FLOAT record"); if (CurTy->isHalfTy()) - V = ConstantFP::get(Context, APFloat(APInt(16, (uint16_t)Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEhalf, + APInt(16, (uint16_t)Record[0]))); else if (CurTy->isFloatTy()) - V = ConstantFP::get(Context, APFloat(APInt(32, (uint32_t)Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEsingle, + APInt(32, (uint32_t)Record[0]))); else if (CurTy->isDoubleTy()) - V = ConstantFP::get(Context, APFloat(APInt(64, Record[0]))); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEdouble, + APInt(64, Record[0]))); else if (CurTy->isX86_FP80Ty()) { // Bits are not stored the same way as a normal i80 APInt, compensate. uint64_t Rearrange[2]; Rearrange[0] = (Record[1] & 0xffffLL) | (Record[0] << 16); Rearrange[1] = Record[0] >> 48; - V = ConstantFP::get(Context, APFloat(APInt(80, Rearrange))); + V = ConstantFP::get(Context, APFloat(APFloat::x87DoubleExtended, + APInt(80, Rearrange))); } else if (CurTy->isFP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, Record), true)); + V = ConstantFP::get(Context, APFloat(APFloat::IEEEquad, + APInt(128, Record))); else if (CurTy->isPPC_FP128Ty()) - V = ConstantFP::get(Context, APFloat(APInt(128, Record))); + V = ConstantFP::get(Context, APFloat(APFloat::PPCDoubleDouble, + APInt(128, Record))); else V = UndefValue::get(CurTy); break; @@ -1073,10 +1149,10 @@ bool BitcodeReader::ParseConstants() { case bitc::CST_CODE_DATA: {// DATA: [n x value] if (Record.empty()) return Error("Invalid CST_DATA record"); - + Type *EltTy = cast(CurTy)->getElementType(); unsigned Size = Record.size(); - + if (EltTy->isIntegerTy(8)) { SmallVector Elts(Record.begin(), Record.end()); if (isa(CurTy)) @@ -1182,10 +1258,11 @@ bool BitcodeReader::ParseConstants() { } case bitc::CST_CODE_CE_SELECT: // CE_SELECT: [opval#, opval#, opval#] if (Record.size() < 3) return Error("Invalid CE_SELECT record"); - V = ConstantExpr::getSelect(ValueList.getConstantFwdRef(Record[0], - Type::getInt1Ty(Context)), - ValueList.getConstantFwdRef(Record[1],CurTy), - ValueList.getConstantFwdRef(Record[2],CurTy)); + V = ConstantExpr::getSelect( + ValueList.getConstantFwdRef(Record[0], + Type::getInt1Ty(Context)), + ValueList.getConstantFwdRef(Record[1],CurTy), + ValueList.getConstantFwdRef(Record[2],CurTy)); break; case bitc::CST_CODE_CE_EXTRACTELT: { // CE_EXTRACTELT: [opty, opval, opval] if (Record.size() < 3) return Error("Invalid CE_EXTRACTELT record"); @@ -1193,7 +1270,8 @@ bool BitcodeReader::ParseConstants() { dyn_cast_or_null(getTypeByID(Record[0])); if (OpTy == 0) return Error("Invalid CE_EXTRACTELT record"); Constant *Op0 = ValueList.getConstantFwdRef(Record[1], OpTy); - Constant *Op1 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + Constant *Op1 = ValueList.getConstantFwdRef(Record[2], + Type::getInt32Ty(Context)); V = ConstantExpr::getExtractElement(Op0, Op1); break; } @@ -1204,7 +1282,8 @@ bool BitcodeReader::ParseConstants() { Constant *Op0 = ValueList.getConstantFwdRef(Record[0], OpTy); Constant *Op1 = ValueList.getConstantFwdRef(Record[1], OpTy->getElementType()); - Constant *Op2 = ValueList.getConstantFwdRef(Record[2], Type::getInt32Ty(Context)); + Constant *Op2 = ValueList.getConstantFwdRef(Record[2], + Type::getInt32Ty(Context)); V = ConstantExpr::getInsertElement(Op0, Op1, Op2); break; } @@ -1324,23 +1403,12 @@ bool BitcodeReader::ParseConstants() { V = FwdRef; } break; - } + } } ValueList.AssignValue(V, NextCstNo); ++NextCstNo; } - - if (NextCstNo != ValueList.size()) - return Error("Invalid constant reference!"); - - if (Stream.ReadBlockEnd()) - return Error("Error at end of constants block"); - - // Once all the constants have been read, go through and resolve forward - // references. - ValueList.ResolveConstantForwardRefs(); - return false; } bool BitcodeReader::ParseUseLists() { @@ -1348,32 +1416,25 @@ bool BitcodeReader::ParseUseLists() { return Error("Malformed block record"); SmallVector Record; - + // Read all the records. while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of use-list table block"); + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed use list block"); + case BitstreamEntry::EndBlock: return false; + case BitstreamEntry::Record: + // The interesting case. + break; } - - if (Code == bitc::ENTER_SUBBLOCK) { - // No known subblocks, always skip them. - Stream.ReadSubBlockID(); - if (Stream.SkipBlock()) - return Error("Malformed block record"); - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } - + // Read a use list record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: unknown type. break; case bitc::USELIST_CODE_ENTRY: { // USELIST_CODE_ENTRY: TBD. @@ -1445,17 +1506,18 @@ bool BitcodeReader::ParseModule(bool Resume) { std::vector GCTable; // Read all the records for this module. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of module block"); + while (1) { + BitstreamEntry Entry = Stream.advance(); + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module block"); + return true; + case BitstreamEntry::EndBlock: return GlobalCleanup(); - } - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { + case BitstreamEntry::SubBlock: + switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return Error("Malformed block record"); @@ -1468,6 +1530,10 @@ bool BitcodeReader::ParseModule(bool Resume) { if (ParseAttributeBlock()) return true; break; + case bitc::PARAMATTR_GROUP_BLOCK_ID: + if (ParseAttributeGroupBlock()) + return true; + break; case bitc::TYPE_BLOCK_ID_NEW: if (ParseTypeTable()) return true; @@ -1514,15 +1580,15 @@ bool BitcodeReader::ParseModule(bool Resume) { break; } continue; - } - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } + // Read a record. - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_VERSION: { // VERSION: [version#] if (Record.size() < 1) @@ -1562,10 +1628,11 @@ bool BitcodeReader::ParseModule(bool Resume) { break; } case bitc::MODULE_CODE_DEPLIB: { // DEPLIB: [strchr x N] + // FIXME: Remove in 4.0. std::string S; if (ConvertToString(Record, 0, S)) return Error("Invalid MODULE_CODE_DEPLIB record"); - TheModule->addLibrary(S); + // Ignore value. break; } case bitc::MODULE_CODE_SECTIONNAME: { // SECTIONNAME: [strchr x N] @@ -1616,9 +1683,13 @@ bool BitcodeReader::ParseModule(bool Resume) { if (Record.size() > 8) UnnamedAddr = Record[8]; + bool ExternallyInitialized = false; + if (Record.size() > 9) + ExternallyInitialized = Record[9]; + GlobalVariable *NewGV = new GlobalVariable(*TheModule, Ty, isConstant, Linkage, 0, "", 0, - TLM, AddressSpace); + TLM, AddressSpace, ExternallyInitialized); NewGV->setAlignment(Alignment); if (!Section.empty()) NewGV->setSection(Section); @@ -1709,8 +1780,6 @@ bool BitcodeReader::ParseModule(bool Resume) { } Record.clear(); } - - return Error("Premature end of bitstream"); } bool BitcodeReader::ParseBitcodeInto(Module *M) { @@ -1729,47 +1798,55 @@ bool BitcodeReader::ParseBitcodeInto(Module *M) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); + while (1) { + if (Stream.AtEndOfStream()) + return false; + + BitstreamEntry Entry = + Stream.advance(BitstreamCursor::AF_DontAutoprocessAbbrevs); - if (Code != bitc::ENTER_SUBBLOCK) { + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module file"); + return true; + case BitstreamEntry::EndBlock: + return false; + + case BitstreamEntry::SubBlock: + switch (Entry.ID) { + case bitc::BLOCKINFO_BLOCK_ID: + if (Stream.ReadBlockInfoBlock()) + return Error("Malformed BlockInfoBlock"); + break; + case bitc::MODULE_BLOCK_ID: + // Reject multiple MODULE_BLOCK's in a single bitstream. + if (TheModule) + return Error("Multiple MODULE_BLOCKs in same stream"); + TheModule = M; + if (ParseModule(false)) + return true; + if (LazyStreamer) return false; + break; + default: + if (Stream.SkipBlock()) + return Error("Malformed block record"); + break; + } + continue; + case BitstreamEntry::Record: + // There should be no records in the top-level of blocks. - // The ranlib in xcode 4 will align archive members by appending newlines + // The ranlib in Xcode 4 will align archive members by appending newlines // to the end of them. If this file size is a multiple of 4 but not 8, we // have to read and ignore these final 4 bytes :-( - if (Stream.GetAbbrevIDWidth() == 2 && Code == 2 && + if (Stream.getAbbrevIDWidth() == 2 && Entry.ID == 2 && Stream.Read(6) == 2 && Stream.Read(24) == 0xa0a0a && Stream.AtEndOfStream()) return false; return Error("Invalid record at top-level"); } - - unsigned BlockID = Stream.ReadSubBlockID(); - - // We only know the MODULE subblock ID. - switch (BlockID) { - case bitc::BLOCKINFO_BLOCK_ID: - if (Stream.ReadBlockInfoBlock()) - return Error("Malformed BlockInfoBlock"); - break; - case bitc::MODULE_BLOCK_ID: - // Reject multiple MODULE_BLOCK's in a single bitstream. - if (TheModule) - return Error("Multiple MODULE_BLOCKs in same stream"); - TheModule = M; - if (ParseModule(false)) - return true; - if (LazyStreamer) return false; - break; - default: - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; - } } - - return false; } bool BitcodeReader::ParseModuleTriple(std::string &Triple) { @@ -1779,32 +1856,22 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { SmallVector Record; // Read all the records for this module. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of module block"); + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed module block"); + case BitstreamEntry::EndBlock: return false; - } - - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { - default: // Skip unknown content. - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; - } - continue; - } - - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: break; // Default behavior, ignore unknown content. case bitc::MODULE_CODE_TRIPLE: { // TRIPLE: [strchr x N] std::string S; @@ -1816,8 +1883,6 @@ bool BitcodeReader::ParseModuleTriple(std::string &Triple) { } Record.clear(); } - - return Error("Premature end of bitstream"); } bool BitcodeReader::ParseTriple(std::string &Triple) { @@ -1834,28 +1899,32 @@ bool BitcodeReader::ParseTriple(std::string &Triple) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. - while (!Stream.AtEndOfStream()) { - unsigned Code = Stream.ReadCode(); + while (1) { + BitstreamEntry Entry = Stream.advance(); - if (Code != bitc::ENTER_SUBBLOCK) - return Error("Invalid record at top-level"); + switch (Entry.Kind) { + case BitstreamEntry::Error: + Error("malformed module file"); + return true; + case BitstreamEntry::EndBlock: + return false; - unsigned BlockID = Stream.ReadSubBlockID(); + case BitstreamEntry::SubBlock: + if (Entry.ID == bitc::MODULE_BLOCK_ID) + return ParseModuleTriple(Triple); - // We only know the MODULE subblock ID. - switch (BlockID) { - case bitc::MODULE_BLOCK_ID: - if (ParseModuleTriple(Triple)) + // Ignore other sub-blocks. + if (Stream.SkipBlock()) { + Error("malformed block record in AST file"); return true; - break; - default: - if (Stream.SkipBlock()) - return Error("Malformed block record"); - break; + } + continue; + + case BitstreamEntry::Record: + Stream.skipRecord(Entry.ID); + continue; } } - - return false; } /// ParseMetadataAttachment - Parse metadata attachments. @@ -1864,20 +1933,23 @@ bool BitcodeReader::ParseMetadataAttachment() { return Error("Malformed block record"); SmallVector Record; - while(1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of PARAMATTR block"); + while (1) { + BitstreamEntry Entry = Stream.advanceSkippingSubblocks(); + + switch (Entry.Kind) { + case BitstreamEntry::SubBlock: // Handled for us already. + case BitstreamEntry::Error: + return Error("malformed metadata block"); + case BitstreamEntry::EndBlock: + return false; + case BitstreamEntry::Record: + // The interesting case. break; } - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; - } + // Read a metadata attachment record. Record.clear(); - switch (Stream.ReadRecord(Code, Record)) { + switch (Stream.readRecord(Entry.ID, Record)) { default: // Default behavior: ignore. break; case bitc::METADATA_ATTACHMENT: { @@ -1898,7 +1970,6 @@ bool BitcodeReader::ParseMetadataAttachment() { } } } - return false; } /// ParseFunctionBody - Lazily parse the specified function body block. @@ -1919,19 +1990,20 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned CurBBNo = 0; DebugLoc LastLoc; - + // Read all the records. SmallVector Record; while (1) { - unsigned Code = Stream.ReadCode(); - if (Code == bitc::END_BLOCK) { - if (Stream.ReadBlockEnd()) - return Error("Error at end of function block"); - break; - } + BitstreamEntry Entry = Stream.advance(); + + switch (Entry.Kind) { + case BitstreamEntry::Error: + return Error("Bitcode error in function block"); + case BitstreamEntry::EndBlock: + goto OutOfRecordLoop; - if (Code == bitc::ENTER_SUBBLOCK) { - switch (Stream.ReadSubBlockID()) { + case BitstreamEntry::SubBlock: + switch (Entry.ID) { default: // Skip unknown content. if (Stream.SkipBlock()) return Error("Malformed block record"); @@ -1951,17 +2023,16 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } continue; - } - if (Code == bitc::DEFINE_ABBREV) { - Stream.ReadAbbrevRecord(); - continue; + case BitstreamEntry::Record: + // The interesting case. + break; } // Read a record. Record.clear(); Instruction *I = 0; - unsigned BitCode = Stream.ReadRecord(Code, Record); + unsigned BitCode = Stream.readRecord(Entry.ID, Record); switch (BitCode) { default: // Default behavior: reject return Error("Unknown instruction"); @@ -1974,24 +2045,24 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { FunctionBBs[i] = BasicBlock::Create(Context, "", F); CurBB = FunctionBBs[0]; continue; - + case bitc::FUNC_CODE_DEBUG_LOC_AGAIN: // DEBUG_LOC_AGAIN // This record indicates that the last instruction is at the same // location as the previous instruction with a location. I = 0; - + // Get the last instruction emitted. if (CurBB && !CurBB->empty()) I = &CurBB->back(); else if (CurBBNo && FunctionBBs[CurBBNo-1] && !FunctionBBs[CurBBNo-1]->empty()) I = &FunctionBBs[CurBBNo-1]->back(); - + if (I == 0) return Error("Invalid DEBUG_LOC_AGAIN record"); I->setDebugLoc(LastLoc); I = 0; continue; - + case bitc::FUNC_CODE_DEBUG_LOC: { // DEBUG_LOC: [line, col, scope, ia] I = 0; // Get the last instruction emitted. if (CurBB && !CurBB->empty()) @@ -2001,10 +2072,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = &FunctionBBs[CurBBNo-1]->back(); if (I == 0 || Record.size() < 4) return Error("Invalid FUNC_CODE_DEBUG_LOC record"); - + unsigned Line = Record[0], Col = Record[1]; unsigned ScopeID = Record[2], IAID = Record[3]; - + MDNode *Scope = 0, *IA = 0; if (ScopeID) Scope = cast(MDValueList.getValueFwdRef(ScopeID-1)); if (IAID) IA = cast(MDValueList.getValueFwdRef(IAID-1)); @@ -2041,7 +2112,22 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { Opc == Instruction::AShr) { if (Record[OpNum] & (1 << bitc::PEO_EXACT)) cast(I)->setIsExact(true); + } else if (isa(I)) { + FastMathFlags FMF; + if (0 != (Record[OpNum] & FastMathFlags::UnsafeAlgebra)) + FMF.setUnsafeAlgebra(); + if (0 != (Record[OpNum] & FastMathFlags::NoNaNs)) + FMF.setNoNaNs(); + if (0 != (Record[OpNum] & FastMathFlags::NoInfs)) + FMF.setNoInfs(); + if (0 != (Record[OpNum] & FastMathFlags::NoSignedZeros)) + FMF.setNoSignedZeros(); + if (0 != (Record[OpNum] & FastMathFlags::AllowReciprocal)) + FMF.setAllowReciprocal(); + if (FMF.any()) + I->setFastMathFlags(FMF); } + } break; } @@ -2272,10 +2358,10 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { break; } case bitc::FUNC_CODE_INST_SWITCH: { // SWITCH: [opty, op0, op1, ...] - // Check magic + // Check magic if ((Record[0] >> 16) == SWITCH_INST_MAGIC) { // New SwitchInst format with case ranges. - + Type *OpTy = getTypeByID(Record[1]); unsigned ValueBitWidth = cast(OpTy)->getBitWidth(); @@ -2285,17 +2371,17 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { return Error("Invalid SWITCH record"); unsigned NumCases = Record[4]; - + SwitchInst *SI = SwitchInst::Create(Cond, Default, NumCases); InstructionList.push_back(SI); - + unsigned CurIdx = 5; for (unsigned i = 0; i != NumCases; ++i) { IntegersSubsetToBB CaseBuilder; unsigned NumItems = Record[CurIdx++]; for (unsigned ci = 0; ci != NumItems; ++ci) { bool isSingleNumber = Record[CurIdx++]; - + APInt Low; unsigned ActiveWords = 1; if (ValueBitWidth > 64) @@ -2311,7 +2397,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { APInt High = ReadWideAPInt(makeArrayRef(&Record[CurIdx], ActiveWords), ValueBitWidth); - + CaseBuilder.add(IntItem::fromType(OpTy, Low), IntItem::fromType(OpTy, High)); CurIdx += ActiveWords; @@ -2319,7 +2405,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { CaseBuilder.add(IntItem::fromType(OpTy, Low)); } BasicBlock *DestBB = getBasicBlock(Record[CurIdx++]); - IntegersSubset Case = CaseBuilder.getCase(); + IntegersSubset Case = CaseBuilder.getCase(); SI->addCase(Case, DestBB); } uint16_t Hash = SI->hash(); @@ -2328,9 +2414,9 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = SI; break; } - + // Old SwitchInst format without case ranges. - + if (Record.size() < 3 || (Record.size() & 1) == 0) return Error("Invalid SWITCH record"); Type *OpTy = getTypeByID(Record[0]); @@ -2375,11 +2461,11 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { I = IBI; break; } - + case bitc::FUNC_CODE_INST_INVOKE: { // INVOKE: [attrs, cc, normBB, unwindBB, fnty, op0,op1,op2, ...] if (Record.size() < 4) return Error("Invalid INVOKE record"); - AttrListPtr PAL = getAttributes(Record[0]); + AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; BasicBlock *NormalBB = getBasicBlock(Record[2]); BasicBlock *UnwindBB = getBasicBlock(Record[3]); @@ -2534,7 +2620,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (getValueTypePair(Record, OpNum, NextValueNo, Op) || OpNum+4 != Record.size()) return Error("Invalid LOADATOMIC record"); - + AtomicOrdering Ordering = GetDecodedOrdering(Record[OpNum+2]); if (Ordering == NotAtomic || Ordering == Release || @@ -2644,7 +2730,7 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { if (Record.size() < 3) return Error("Invalid CALL record"); - AttrListPtr PAL = getAttributes(Record[0]); + AttributeSet PAL = getAttributes(Record[0]); unsigned CCInfo = Record[1]; unsigned OpNum = 2; @@ -2723,6 +2809,8 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { ValueList.AssignValue(I, NextValueNo++); } +OutOfRecordLoop: + // Check the function list for unresolved values. if (Argument *A = dyn_cast(ValueList.back())) { if (A->getParent() == 0) { @@ -2750,15 +2838,15 @@ bool BitcodeReader::ParseFunctionBody(Function *F) { unsigned BlockIdx = RefList[i].first; if (BlockIdx >= FunctionBBs.size()) return Error("Invalid blockaddress block #"); - + GlobalVariable *FwdRef = RefList[i].second; FwdRef->replaceAllUsesWith(BlockAddress::get(F, FunctionBBs[BlockIdx])); FwdRef->eraseFromParent(); } - + BlockAddrFwdRefs.erase(BAFRI); } - + // Trim the value list down to the size it was before we parsed this function. ValueList.shrinkTo(ModuleValueListSize); MDValueList.shrinkTo(ModuleMDValueListSize); diff --git a/lib/Bitcode/Reader/BitcodeReader.h b/lib/Bitcode/Reader/BitcodeReader.h index 3d5c0eb4def4..28674eb14ef2 100644 --- a/lib/Bitcode/Reader/BitcodeReader.h +++ b/lib/Bitcode/Reader/BitcodeReader.h @@ -14,27 +14,27 @@ #ifndef BITCODE_READER_H #define BITCODE_READER_H -#include "llvm/GVMaterializer.h" -#include "llvm/Attributes.h" -#include "llvm/Type.h" -#include "llvm/OperandTraits.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/Bitcode/BitstreamReader.h" #include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/GVMaterializer.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/OperandTraits.h" +#include "llvm/IR/Type.h" #include "llvm/Support/ValueHandle.h" -#include "llvm/ADT/DenseMap.h" #include namespace llvm { class MemoryBuffer; class LLVMContext; - + //===----------------------------------------------------------------------===// // BitcodeReaderValueList Class //===----------------------------------------------------------------------===// class BitcodeReaderValueList { std::vector ValuePtrs; - + /// ResolveConstants - As we resolve forward-referenced constants, we add /// information about them to this vector. This allows us to resolve them in /// bulk instead of resolving each reference at a time. See the code in @@ -57,17 +57,17 @@ public: void push_back(Value *V) { ValuePtrs.push_back(V); } - + void clear() { assert(ResolveConstants.empty() && "Constants not resolved?"); ValuePtrs.clear(); } - + Value *operator[](unsigned i) const { assert(i < ValuePtrs.size()); return ValuePtrs[i]; } - + Value *back() const { return ValuePtrs.back(); } void pop_back() { ValuePtrs.pop_back(); } bool empty() const { return ValuePtrs.empty(); } @@ -75,12 +75,12 @@ public: assert(N <= size() && "Invalid shrinkTo request!"); ValuePtrs.resize(N); } - + Constant *getConstantFwdRef(unsigned Idx, Type *Ty); Value *getValueFwdRef(unsigned Idx, Type *Ty); - + void AssignValue(Value *V, unsigned Idx); - + /// ResolveConstantForwardRefs - Once all constants are read, this method bulk /// resolves any forward references. void ResolveConstantForwardRefs(); @@ -93,7 +93,7 @@ public: class BitcodeReaderMDValueList { std::vector MDValuePtrs; - + LLVMContext &Context; public: BitcodeReaderMDValueList(LLVMContext& C) : Context(C) {} @@ -106,12 +106,12 @@ public: Value *back() const { return MDValuePtrs.back(); } void pop_back() { MDValuePtrs.pop_back(); } bool empty() const { return MDValuePtrs.empty(); } - + Value *operator[](unsigned i) const { assert(i < MDValuePtrs.size()); return MDValuePtrs[i]; } - + void shrinkTo(unsigned N) { assert(N <= size() && "Invalid shrinkTo request!"); MDValuePtrs.resize(N); @@ -131,9 +131,9 @@ class BitcodeReader : public GVMaterializer { DataStreamer *LazyStreamer; uint64_t NextUnreadBit; bool SeenValueSymbolTable; - + const char *ErrorString; - + std::vector TypeList; BitcodeReaderValueList ValueList; BitcodeReaderMDValueList MDValueList; @@ -142,38 +142,41 @@ class BitcodeReader : public GVMaterializer { std::vector > GlobalInits; std::vector > AliasInits; - + /// MAttributes - The set of attributes by index. Index zero in the /// file is for null, and is thus not represented here. As such all indices /// are off by one. - std::vector MAttributes; - + std::vector MAttributes; + + /// \brief The set of attribute groups. + std::map MAttributeGroups; + /// FunctionBBs - While parsing a function body, this is a list of the basic /// blocks for the function. std::vector FunctionBBs; - + // When reading the module header, this list is populated with functions that // have bodies later in the file. std::vector FunctionsWithBodies; - // When intrinsic functions are encountered which require upgrading they are + // When intrinsic functions are encountered which require upgrading they are // stored here with their replacement function. typedef std::vector > UpgradedIntrinsicMap; UpgradedIntrinsicMap UpgradedIntrinsics; // Map the bitcode's custom MDKind ID to the Module's MDKind ID. DenseMap MDKindMap; - + // Several operations happen after the module header has been read, but // before function bodies are processed. This keeps track of whether // we've done this yet. bool SeenFirstFunctionBody; - + /// DeferredFunctionInfo - When function bodies are initially scanned, this /// map contains info about where to find deferred function body in the /// stream. DenseMap DeferredFunctionInfo; - + /// BlockAddrFwdRefs - These are blockaddr references to basic blocks. These /// are resolved lazily when functions are loaded. typedef std::pair BlockAddrRefTy; @@ -208,11 +211,11 @@ public: void materializeForwardReferencedFunctions(); void FreeState(); - + /// setBufferOwned - If this is true, the reader will destroy the MemoryBuffer /// when the reader is destroyed. void setBufferOwned(bool Owned) { BufferOwned = Owned; } - + virtual bool isMaterializable(const GlobalValue *GV) const; virtual bool isDematerializable(const GlobalValue *GV) const; virtual bool Materialize(GlobalValue *GV, std::string *ErrInfo = 0); @@ -224,7 +227,7 @@ public: return true; } const char *getErrorString() const { return ErrorString; } - + /// @brief Main interface to parsing a bitcode buffer. /// @returns true if an error occurred. bool ParseBitcodeInto(Module *M); @@ -246,12 +249,12 @@ private: if (ID >= FunctionBBs.size()) return 0; // Invalid ID return FunctionBBs[ID]; } - AttrListPtr getAttributes(unsigned i) const { + AttributeSet getAttributes(unsigned i) const { if (i-1 < MAttributes.size()) return MAttributes[i-1]; - return AttrListPtr(); + return AttributeSet(); } - + /// getValueTypePair - Read a value/type pair out of the specified record from /// slot 'Slot'. Increment Slot past the number of slots used in the record. /// Return true on failure. @@ -320,6 +323,7 @@ private: bool ParseModule(bool Resume); bool ParseAttributeBlock(); + bool ParseAttributeGroupBlock(); bool ParseTypeTable(); bool ParseTypeTableBody(); @@ -339,7 +343,7 @@ private: bool FindFunctionInStream(Function *F, DenseMap::iterator DeferredFunctionInfoIterator); }; - + } // End llvm namespace #endif diff --git a/lib/Bitcode/Reader/BitstreamReader.cpp b/lib/Bitcode/Reader/BitstreamReader.cpp new file mode 100644 index 000000000000..9dafe2a03670 --- /dev/null +++ b/lib/Bitcode/Reader/BitstreamReader.cpp @@ -0,0 +1,371 @@ +//===- BitstreamReader.cpp - BitstreamReader implementation ---------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Bitcode/BitstreamReader.h" + +using namespace llvm; + +//===----------------------------------------------------------------------===// +// BitstreamCursor implementation +//===----------------------------------------------------------------------===// + +void BitstreamCursor::operator=(const BitstreamCursor &RHS) { + freeState(); + + BitStream = RHS.BitStream; + NextChar = RHS.NextChar; + CurWord = RHS.CurWord; + BitsInCurWord = RHS.BitsInCurWord; + CurCodeSize = RHS.CurCodeSize; + + // Copy abbreviations, and bump ref counts. + CurAbbrevs = RHS.CurAbbrevs; + for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) + CurAbbrevs[i]->addRef(); + + // Copy block scope and bump ref counts. + BlockScope = RHS.BlockScope; + for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { + std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; + for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) + Abbrevs[i]->addRef(); + } +} + +void BitstreamCursor::freeState() { + // Free all the Abbrevs. + for (size_t i = 0, e = CurAbbrevs.size(); i != e; ++i) + CurAbbrevs[i]->dropRef(); + CurAbbrevs.clear(); + + // Free all the Abbrevs in the block scope. + for (size_t S = 0, e = BlockScope.size(); S != e; ++S) { + std::vector &Abbrevs = BlockScope[S].PrevAbbrevs; + for (size_t i = 0, e = Abbrevs.size(); i != e; ++i) + Abbrevs[i]->dropRef(); + } + BlockScope.clear(); +} + +/// EnterSubBlock - Having read the ENTER_SUBBLOCK abbrevid, enter +/// the block, and return true if the block has an error. +bool BitstreamCursor::EnterSubBlock(unsigned BlockID, unsigned *NumWordsP) { + // Save the current block's state on BlockScope. + BlockScope.push_back(Block(CurCodeSize)); + BlockScope.back().PrevAbbrevs.swap(CurAbbrevs); + + // Add the abbrevs specific to this block to the CurAbbrevs list. + if (const BitstreamReader::BlockInfo *Info = + BitStream->getBlockInfo(BlockID)) { + for (size_t i = 0, e = Info->Abbrevs.size(); i != e; ++i) { + CurAbbrevs.push_back(Info->Abbrevs[i]); + CurAbbrevs.back()->addRef(); + } + } + + // Get the codesize of this block. + CurCodeSize = ReadVBR(bitc::CodeLenWidth); + SkipToFourByteBoundary(); + unsigned NumWords = Read(bitc::BlockSizeWidth); + if (NumWordsP) *NumWordsP = NumWords; + + // Validate that this block is sane. + if (CurCodeSize == 0 || AtEndOfStream()) + return true; + + return false; +} + +void BitstreamCursor::readAbbreviatedLiteral(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals) { + assert(Op.isLiteral() && "Not a literal"); + // If the abbrev specifies the literal value to use, use it. + Vals.push_back(Op.getLiteralValue()); +} + +void BitstreamCursor::readAbbreviatedField(const BitCodeAbbrevOp &Op, + SmallVectorImpl &Vals) { + assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); + + // Decode the value as we are commanded. + switch (Op.getEncoding()) { + case BitCodeAbbrevOp::Array: + case BitCodeAbbrevOp::Blob: + assert(0 && "Should not reach here"); + case BitCodeAbbrevOp::Fixed: + Vals.push_back(Read((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::VBR: + Vals.push_back(ReadVBR64((unsigned)Op.getEncodingData())); + break; + case BitCodeAbbrevOp::Char6: + Vals.push_back(BitCodeAbbrevOp::DecodeChar6(Read(6))); + break; + } +} + +void BitstreamCursor::skipAbbreviatedField(const BitCodeAbbrevOp &Op) { + assert(!Op.isLiteral() && "Use ReadAbbreviatedLiteral for literals!"); + + // Decode the value as we are commanded. + switch (Op.getEncoding()) { + case BitCodeAbbrevOp::Array: + case BitCodeAbbrevOp::Blob: + assert(0 && "Should not reach here"); + case BitCodeAbbrevOp::Fixed: + (void)Read((unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::VBR: + (void)ReadVBR64((unsigned)Op.getEncodingData()); + break; + case BitCodeAbbrevOp::Char6: + (void)Read(6); + break; + } +} + + + +/// skipRecord - Read the current record and discard it. +void BitstreamCursor::skipRecord(unsigned AbbrevID) { + // Skip unabbreviated records by reading past their entries. + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + (void)Code; + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + (void)ReadVBR64(6); + return; + } + + const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral()) + continue; + + if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob) { + skipAbbreviatedField(Op); + continue; + } + + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { + // Array case. Read the number of elements as a vbr6. + unsigned NumElts = ReadVBR(6); + + // Get the element encoding. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Read all the elements. + for (; NumElts; --NumElts) + skipAbbreviatedField(EltEnc); + continue; + } + + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); + // Blob case. Read the number of bytes as a vbr6. + unsigned NumElts = ReadVBR(6); + SkipToFourByteBoundary(); // 32-bit alignment + + // Figure out where the end of this blob will be including tail padding. + size_t NewEnd = GetCurrentBitNo()+((NumElts+3)&~3)*8; + + // If this would read off the end of the bitcode file, just set the + // record to empty and return. + if (!canSkipToPos(NewEnd/8)) { + NextChar = BitStream->getBitcodeBytes().getExtent(); + break; + } + + // Skip over the blob. + JumpToBit(NewEnd); + } +} + +unsigned BitstreamCursor::readRecord(unsigned AbbrevID, + SmallVectorImpl &Vals, + StringRef *Blob) { + if (AbbrevID == bitc::UNABBREV_RECORD) { + unsigned Code = ReadVBR(6); + unsigned NumElts = ReadVBR(6); + for (unsigned i = 0; i != NumElts; ++i) + Vals.push_back(ReadVBR64(6)); + return Code; + } + + const BitCodeAbbrev *Abbv = getAbbrev(AbbrevID); + + for (unsigned i = 0, e = Abbv->getNumOperandInfos(); i != e; ++i) { + const BitCodeAbbrevOp &Op = Abbv->getOperandInfo(i); + if (Op.isLiteral()) { + readAbbreviatedLiteral(Op, Vals); + continue; + } + + if (Op.getEncoding() != BitCodeAbbrevOp::Array && + Op.getEncoding() != BitCodeAbbrevOp::Blob) { + readAbbreviatedField(Op, Vals); + continue; + } + + if (Op.getEncoding() == BitCodeAbbrevOp::Array) { + // Array case. Read the number of elements as a vbr6. + unsigned NumElts = ReadVBR(6); + + // Get the element encoding. + assert(i+2 == e && "array op not second to last?"); + const BitCodeAbbrevOp &EltEnc = Abbv->getOperandInfo(++i); + + // Read all the elements. + for (; NumElts; --NumElts) + readAbbreviatedField(EltEnc, Vals); + continue; + } + + assert(Op.getEncoding() == BitCodeAbbrevOp::Blob); + // Blob case. Read the number of bytes as a vbr6. + unsigned NumElts = ReadVBR(6); + SkipToFourByteBoundary(); // 32-bit alignment + + // Figure out where the end of this blob will be including tail padding. + size_t CurBitPos = GetCurrentBitNo(); + size_t NewEnd = CurBitPos+((NumElts+3)&~3)*8; + + // If this would read off the end of the bitcode file, just set the + // record to empty and return. + if (!canSkipToPos(NewEnd/8)) { + Vals.append(NumElts, 0); + NextChar = BitStream->getBitcodeBytes().getExtent(); + break; + } + + // Otherwise, inform the streamer that we need these bytes in memory. + const char *Ptr = (const char*) + BitStream->getBitcodeBytes().getPointer(CurBitPos/8, NumElts); + + // If we can return a reference to the data, do so to avoid copying it. + if (Blob) { + *Blob = StringRef(Ptr, NumElts); + } else { + // Otherwise, unpack into Vals with zero extension. + for (; NumElts; --NumElts) + Vals.push_back((unsigned char)*Ptr++); + } + // Skip over tail padding. + JumpToBit(NewEnd); + } + + unsigned Code = (unsigned)Vals[0]; + Vals.erase(Vals.begin()); + return Code; +} + + +void BitstreamCursor::ReadAbbrevRecord() { + BitCodeAbbrev *Abbv = new BitCodeAbbrev(); + unsigned NumOpInfo = ReadVBR(5); + for (unsigned i = 0; i != NumOpInfo; ++i) { + bool IsLiteral = Read(1) ? true : false; + if (IsLiteral) { + Abbv->Add(BitCodeAbbrevOp(ReadVBR64(8))); + continue; + } + + BitCodeAbbrevOp::Encoding E = (BitCodeAbbrevOp::Encoding)Read(3); + if (BitCodeAbbrevOp::hasEncodingData(E)) { + unsigned Data = ReadVBR64(5); + + // As a special case, handle fixed(0) (i.e., a fixed field with zero bits) + // and vbr(0) as a literal zero. This is decoded the same way, and avoids + // a slow path in Read() to have to handle reading zero bits. + if ((E == BitCodeAbbrevOp::Fixed || E == BitCodeAbbrevOp::VBR) && + Data == 0) { + Abbv->Add(BitCodeAbbrevOp(0)); + continue; + } + + Abbv->Add(BitCodeAbbrevOp(E, Data)); + } else + Abbv->Add(BitCodeAbbrevOp(E)); + } + CurAbbrevs.push_back(Abbv); +} + +bool BitstreamCursor::ReadBlockInfoBlock() { + // If this is the second stream to get to the block info block, skip it. + if (BitStream->hasBlockInfoRecords()) + return SkipBlock(); + + if (EnterSubBlock(bitc::BLOCKINFO_BLOCK_ID)) return true; + + SmallVector Record; + BitstreamReader::BlockInfo *CurBlockInfo = 0; + + // Read all the records for this module. + while (1) { + BitstreamEntry Entry = advanceSkippingSubblocks(AF_DontAutoprocessAbbrevs); + + switch (Entry.Kind) { + case llvm::BitstreamEntry::SubBlock: // Handled for us already. + case llvm::BitstreamEntry::Error: + return true; + case llvm::BitstreamEntry::EndBlock: + return false; + case llvm::BitstreamEntry::Record: + // The interesting case. + break; + } + + // Read abbrev records, associate them with CurBID. + if (Entry.ID == bitc::DEFINE_ABBREV) { + if (!CurBlockInfo) return true; + ReadAbbrevRecord(); + + // ReadAbbrevRecord installs the abbrev in CurAbbrevs. Move it to the + // appropriate BlockInfo. + BitCodeAbbrev *Abbv = CurAbbrevs.back(); + CurAbbrevs.pop_back(); + CurBlockInfo->Abbrevs.push_back(Abbv); + continue; + } + + // Read a record. + Record.clear(); + switch (readRecord(Entry.ID, Record)) { + default: break; // Default behavior, ignore unknown content. + case bitc::BLOCKINFO_CODE_SETBID: + if (Record.size() < 1) return true; + CurBlockInfo = &BitStream->getOrCreateBlockInfo((unsigned)Record[0]); + break; + case bitc::BLOCKINFO_CODE_BLOCKNAME: { + if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. + std::string Name; + for (unsigned i = 0, e = Record.size(); i != e; ++i) + Name += (char)Record[i]; + CurBlockInfo->Name = Name; + break; + } + case bitc::BLOCKINFO_CODE_SETRECORDNAME: { + if (!CurBlockInfo) return true; + if (BitStream->isIgnoringBlockInfoNames()) break; // Ignore name. + std::string Name; + for (unsigned i = 1, e = Record.size(); i != e; ++i) + Name += (char)Record[i]; + CurBlockInfo->RecordNames.push_back(std::make_pair((unsigned)Record[0], + Name)); + break; + } + } + } +} + diff --git a/lib/Bitcode/Reader/CMakeLists.txt b/lib/Bitcode/Reader/CMakeLists.txt index dfe7e1065c7d..f614c9fd4a03 100644 --- a/lib/Bitcode/Reader/CMakeLists.txt +++ b/lib/Bitcode/Reader/CMakeLists.txt @@ -1,6 +1,7 @@ add_llvm_library(LLVMBitReader BitReader.cpp BitcodeReader.cpp + BitstreamReader.cpp ) add_dependencies(LLVMBitReader intrinsics_gen) diff --git a/lib/Bitcode/Writer/BitWriter.cpp b/lib/Bitcode/Writer/BitWriter.cpp index 428842246331..9f51c35ad92e 100644 --- a/lib/Bitcode/Writer/BitWriter.cpp +++ b/lib/Bitcode/Writer/BitWriter.cpp @@ -17,12 +17,11 @@ using namespace llvm; int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { std::string ErrorInfo; - raw_fd_ostream OS(Path, ErrorInfo, - raw_fd_ostream::F_Binary); - + raw_fd_ostream OS(Path, ErrorInfo, raw_fd_ostream::F_Binary); + if (!ErrorInfo.empty()) return -1; - + WriteBitcodeToFile(unwrap(M), OS); return 0; } @@ -30,7 +29,7 @@ int LLVMWriteBitcodeToFile(LLVMModuleRef M, const char *Path) { int LLVMWriteBitcodeToFD(LLVMModuleRef M, int FD, int ShouldClose, int Unbuffered) { raw_fd_ostream OS(FD, ShouldClose, Unbuffered); - + WriteBitcodeToFile(unwrap(M), OS); return 0; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 60c657ae6dd4..1b73f23e8f60 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -12,22 +12,22 @@ //===----------------------------------------------------------------------===// #include "llvm/Bitcode/ReaderWriter.h" -#include "llvm/Bitcode/BitstreamWriter.h" -#include "llvm/Bitcode/LLVMBitCodes.h" #include "ValueEnumerator.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Module.h" -#include "llvm/Operator.h" -#include "llvm/ValueSymbolTable.h" #include "llvm/ADT/Triple.h" +#include "llvm/Bitcode/BitstreamWriter.h" +#include "llvm/Bitcode/LLVMBitCodes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Program.h" +#include "llvm/Support/raw_ostream.h" #include #include using namespace llvm; @@ -61,7 +61,7 @@ enum { FUNCTION_INST_RET_VOID_ABBREV, FUNCTION_INST_RET_VAL_ABBREV, FUNCTION_INST_UNREACHABLE_ABBREV, - + // SwitchInst Magic SWITCH_INST_MAGIC = 0x4B5 // May 2012 => 1205 => Hex }; @@ -161,22 +161,66 @@ static void WriteStringRecord(unsigned Code, StringRef Str, Stream.EmitRecord(Code, Vals, AbbrevToUse); } -// Emit information about parameter attributes. +static void WriteAttributeGroupTable(const ValueEnumerator &VE, + BitstreamWriter &Stream) { + const std::vector &AttrGrps = VE.getAttributeGroups(); + if (AttrGrps.empty()) return; + + Stream.EnterSubblock(bitc::PARAMATTR_GROUP_BLOCK_ID, 3); + + SmallVector Record; + for (unsigned i = 0, e = AttrGrps.size(); i != e; ++i) { + AttributeSet AS = AttrGrps[i]; + for (unsigned i = 0, e = AS.getNumSlots(); i != e; ++i) { + AttributeSet A = AS.getSlotAttributes(i); + + Record.push_back(VE.getAttributeGroupID(A)); + Record.push_back(AS.getSlotIndex(i)); + + for (AttributeSet::iterator I = AS.begin(0), E = AS.end(0); + I != E; ++I) { + Attribute Attr = *I; + if (Attr.isEnumAttribute()) { + Record.push_back(0); + Record.push_back(Attr.getKindAsEnum()); + } else if (Attr.isAlignAttribute()) { + Record.push_back(1); + Record.push_back(Attr.getKindAsEnum()); + Record.push_back(Attr.getValueAsInt()); + } else { + StringRef Kind = Attr.getKindAsString(); + StringRef Val = Attr.getValueAsString(); + + Record.push_back(Val.empty() ? 3 : 4); + Record.append(Kind.begin(), Kind.end()); + Record.push_back(0); + if (!Val.empty()) { + Record.append(Val.begin(), Val.end()); + Record.push_back(0); + } + } + } + + Stream.EmitRecord(bitc::PARAMATTR_GRP_CODE_ENTRY, Record); + Record.clear(); + } + } + + Stream.ExitBlock(); +} + static void WriteAttributeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { - const std::vector &Attrs = VE.getAttributes(); + const std::vector &Attrs = VE.getAttributes(); if (Attrs.empty()) return; Stream.EnterSubblock(bitc::PARAMATTR_BLOCK_ID, 3); SmallVector Record; for (unsigned i = 0, e = Attrs.size(); i != e; ++i) { - const AttrListPtr &A = Attrs[i]; - for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) { - const AttributeWithIndex &PAWI = A.getSlot(i); - Record.push_back(PAWI.Index); - Record.push_back(Attributes::encodeLLVMAttributesForBitcode(PAWI.Attrs)); - } + const AttributeSet &A = Attrs[i]; + for (unsigned i = 0, e = A.getNumSlots(); i != e; ++i) + Record.push_back(VE.getAttributeGroupID(A.getSlotAttributes(i))); Stream.EmitRecord(bitc::PARAMATTR_CODE_ENTRY, Record); Record.clear(); @@ -234,7 +278,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { Abbv->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, NumBits)); unsigned StructNamedAbbrev = Stream.EmitAbbrev(Abbv); - + // Abbrev for TYPE_CODE_ARRAY. Abbv = new BitCodeAbbrev(); Abbv->Add(BitCodeAbbrevOp(bitc::TYPE_CODE_ARRAY)); @@ -256,16 +300,16 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { switch (T->getTypeID()) { default: llvm_unreachable("Unknown type!"); - case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; - case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; - case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; - case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; - case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; - case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; + case Type::VoidTyID: Code = bitc::TYPE_CODE_VOID; break; + case Type::HalfTyID: Code = bitc::TYPE_CODE_HALF; break; + case Type::FloatTyID: Code = bitc::TYPE_CODE_FLOAT; break; + case Type::DoubleTyID: Code = bitc::TYPE_CODE_DOUBLE; break; + case Type::X86_FP80TyID: Code = bitc::TYPE_CODE_X86_FP80; break; + case Type::FP128TyID: Code = bitc::TYPE_CODE_FP128; break; case Type::PPC_FP128TyID: Code = bitc::TYPE_CODE_PPC_FP128; break; - case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; - case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; - case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; + case Type::LabelTyID: Code = bitc::TYPE_CODE_LABEL; break; + case Type::MetadataTyID: Code = bitc::TYPE_CODE_METADATA; break; + case Type::X86_MMXTyID: Code = bitc::TYPE_CODE_X86_MMX; break; case Type::IntegerTyID: // INTEGER: [width] Code = bitc::TYPE_CODE_INTEGER; @@ -300,7 +344,7 @@ static void WriteTypeTable(const ValueEnumerator &VE, BitstreamWriter &Stream) { for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) TypeVals.push_back(VE.getTypeID(*I)); - + if (ST->isLiteral()) { Code = bitc::TYPE_CODE_STRUCT_ANON; AbbrevToUse = StructAnonAbbrev; @@ -392,10 +436,6 @@ static unsigned getEncodedThreadLocalMode(const GlobalVariable *GV) { // descriptors for global variables, and function prototype info. static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, BitstreamWriter &Stream) { - // Emit the list of dependent libraries for the Module. - for (Module::lib_iterator I = M->lib_begin(), E = M->lib_end(); I != E; ++I) - WriteStringRecord(bitc::MODULE_CODE_DEPLIB, *I, 0/*TODO*/, Stream); - // Emit various pieces of data attached to a module. if (!M->getTargetTriple().empty()) WriteStringRecord(bitc::MODULE_CODE_TRIPLE, M->getTargetTriple(), @@ -494,10 +534,11 @@ static void WriteModuleInfo(const Module *M, const ValueEnumerator &VE, Vals.push_back(GV->hasSection() ? SectionMap[GV->getSection()] : 0); if (GV->isThreadLocal() || GV->getVisibility() != GlobalValue::DefaultVisibility || - GV->hasUnnamedAddr()) { + GV->hasUnnamedAddr() || GV->isExternallyInitialized()) { Vals.push_back(getEncodedVisibility(GV)); Vals.push_back(getEncodedThreadLocalMode(GV)); Vals.push_back(GV->hasUnnamedAddr()); + Vals.push_back(GV->isExternallyInitialized()); } else { AbbrevToUse = SimpleGVarAbbrev; } @@ -553,6 +594,18 @@ static uint64_t GetOptimizationFlags(const Value *V) { dyn_cast(V)) { if (PEO->isExact()) Flags |= 1 << bitc::PEO_EXACT; + } else if (const FPMathOperator *FPMO = + dyn_cast(V)) { + if (FPMO->hasUnsafeAlgebra()) + Flags |= FastMathFlags::UnsafeAlgebra; + if (FPMO->hasNoNaNs()) + Flags |= FastMathFlags::NoNaNs; + if (FPMO->hasNoInfs()) + Flags |= FastMathFlags::NoInfs; + if (FPMO->hasNoSignedZeros()) + Flags |= FastMathFlags::NoSignedZeros; + if (FPMO->hasAllowReciprocal()) + Flags |= FastMathFlags::AllowReciprocal; } return Flags; @@ -658,7 +711,7 @@ static void WriteFunctionLocalMetadata(const Function &F, } WriteMDNode(N, VE, Stream, Record); } - + if (StartedMetadataBlock) Stream.ExitBlock(); } @@ -673,18 +726,18 @@ static void WriteMetadataAttachment(const Function &F, // Write metadata attachments // METADATA_ATTACHMENT - [m x [value, [n x [id, mdnode]]] SmallVector, 4> MDs; - + for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { MDs.clear(); I->getAllMetadataOtherThanDebugLoc(MDs); - + // If no metadata, ignore instruction. if (MDs.empty()) continue; Record.push_back(VE.getInstructionID(I)); - + for (unsigned i = 0, e = MDs.size(); i != e; ++i) { Record.push_back(MDs[i].first); Record.push_back(VE.getValueID(MDs[i].second)); @@ -701,18 +754,18 @@ static void WriteModuleMetadataStore(const Module *M, BitstreamWriter &Stream) { // Write metadata kinds // METADATA_KIND - [n x [id, name]] - SmallVector Names; + SmallVector Names; M->getMDKindNames(Names); - + if (Names.empty()) return; Stream.EnterSubblock(bitc::METADATA_BLOCK_ID, 3); - + for (unsigned MDKindID = 0, e = Names.size(); MDKindID != e; ++MDKindID) { Record.push_back(MDKindID); StringRef KName = Names[MDKindID]; Record.append(KName.begin(), KName.end()); - + Stream.EmitRecord(bitc::METADATA_KIND, Record, 0); Record.clear(); } @@ -743,10 +796,10 @@ static void EmitAPInt(SmallVectorImpl &Vals, // format it is likely that the high bits are going to be zero. // So, we only write the number of active words. unsigned NWords = Val.getActiveWords(); - + if (EmitSizeForWideNumbers) Vals.push_back(NWords); - + const uint64_t *RawWords = Val.getRawData(); for (unsigned i = 0; i != NWords; ++i) { emitSignedInt64(Vals, RawWords[i]); @@ -881,12 +934,12 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, if (isCStrChar6) isCStrChar6 = BitCodeAbbrevOp::isChar6(V); } - + if (isCStrChar6) AbbrevToUse = CString6Abbrev; else if (isCStr7) AbbrevToUse = CString7Abbrev; - } else if (const ConstantDataSequential *CDS = + } else if (const ConstantDataSequential *CDS = dyn_cast(C)) { Code = bitc::CST_CODE_DATA; Type *EltTy = CDS->getType()->getElementType(); @@ -1166,7 +1219,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, case Instruction::Br: { Code = bitc::FUNC_CODE_INST_BR; - BranchInst &II = cast(I); + const BranchInst &II = cast(I); Vals.push_back(VE.getValueID(II.getSuccessor(0))); if (II.isConditional()) { Vals.push_back(VE.getValueID(II.getSuccessor(1))); @@ -1179,36 +1232,36 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, // Redefine Vals, since here we need to use 64 bit values // explicitly to store large APInt numbers. SmallVector Vals64; - + Code = bitc::FUNC_CODE_INST_SWITCH; - SwitchInst &SI = cast(I); - - uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); - Vals64.push_back(SwitchRecordHeader); - + const SwitchInst &SI = cast(I); + + uint32_t SwitchRecordHeader = SI.hash() | (SWITCH_INST_MAGIC << 16); + Vals64.push_back(SwitchRecordHeader); + Vals64.push_back(VE.getTypeID(SI.getCondition()->getType())); pushValue64(SI.getCondition(), InstID, Vals64, VE); Vals64.push_back(VE.getValueID(SI.getDefaultDest())); Vals64.push_back(SI.getNumCases()); - for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); + for (SwitchInst::ConstCaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) { - IntegersSubset& CaseRanges = i.getCaseValueEx(); + const IntegersSubset& CaseRanges = i.getCaseValueEx(); unsigned Code, Abbrev; // will unused. - + if (CaseRanges.isSingleNumber()) { Vals64.push_back(1/*NumItems = 1*/); Vals64.push_back(true/*IsSingleNumber = true*/); EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(0), true); } else { - + Vals64.push_back(CaseRanges.getNumItems()); - + if (CaseRanges.isSingleNumbersOnly()) { for (unsigned ri = 0, rn = CaseRanges.getNumItems(); ri != rn; ++ri) { - + Vals64.push_back(true/*IsSingleNumber = true*/); - + EmitAPInt(Vals64, Code, Abbrev, CaseRanges.getSingleNumber(ri), true); } @@ -1217,9 +1270,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, ri != rn; ++ri) { IntegersSubset::Range r = CaseRanges.getItem(ri); bool IsSingleNumber = CaseRanges.isSingleNumber(ri); - + Vals64.push_back(IsSingleNumber); - + EmitAPInt(Vals64, Code, Abbrev, r.getLow(), true); if (!IsSingleNumber) EmitAPInt(Vals64, Code, Abbrev, r.getHigh(), true); @@ -1227,9 +1280,9 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, } Vals64.push_back(VE.getValueID(i.getCaseSuccessor())); } - + Stream.EmitRecord(Code, Vals64, AbbrevToUse); - + // Also do expected action - clear external Vals collection: Vals.clear(); return; @@ -1243,7 +1296,7 @@ static void WriteInstruction(const Instruction &I, unsigned InstID, for (unsigned i = 1, e = I.getNumOperands(); i != e; ++i) Vals.push_back(VE.getValueID(I.getOperand(i))); break; - + case Instruction::Invoke: { const InvokeInst *II = cast(&I); const Value *Callee(II->getCalledValue()); @@ -1502,21 +1555,21 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, unsigned InstID = CstEnd; bool NeedsMetadataAttachment = false; - + DebugLoc LastDL; - + // Finally, emit all the instructions, in order. for (Function::const_iterator BB = F.begin(), E = F.end(); BB != E; ++BB) for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; ++I) { WriteInstruction(*I, InstID, VE, Stream, Vals); - + if (!I->getType()->isVoidTy()) ++InstID; - + // If the instruction has metadata, write a metadata attachment later. NeedsMetadataAttachment |= I->hasMetadataOtherThanDebugLoc(); - + // If the instruction has a debug location, emit it. DebugLoc DL = I->getDebugLoc(); if (DL.isUnknown()) { @@ -1527,14 +1580,14 @@ static void WriteFunction(const Function &F, ValueEnumerator &VE, } else { MDNode *Scope, *IA; DL.getScopeAndInlinedAt(Scope, IA, I->getContext()); - + Vals.push_back(DL.getLine()); Vals.push_back(DL.getCol()); Vals.push_back(Scope ? VE.getValueID(Scope)+1 : 0); Vals.push_back(IA ? VE.getValueID(IA)+1 : 0); Stream.EmitRecord(bitc::FUNC_CODE_DEBUG_LOC, Vals); Vals.clear(); - + LastDL = DL; } } @@ -1709,7 +1762,7 @@ static void WriteBlockInfo(const ValueEnumerator &VE, BitstreamWriter &Stream) { Stream.ExitBlock(); } -// Sort the Users based on the order in which the reader parses the bitcode +// Sort the Users based on the order in which the reader parses the bitcode // file. static bool bitcodereader_order(const User *lhs, const User *rhs) { // TODO: Implement. @@ -1778,9 +1831,9 @@ static void WriteModuleUseLists(const Module *M, ValueEnumerator &VE, for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); I != E; ++I) I->removeDeadConstantUsers(); - + // Write the global variables. - for (Module::const_global_iterator GI = M->global_begin(), + for (Module::const_global_iterator GI = M->global_begin(), GE = M->global_end(); GI != GE; ++GI) { WriteUseList(GI, VE, Stream); @@ -1821,6 +1874,9 @@ static void WriteModule(const Module *M, BitstreamWriter &Stream) { // Emit blockinfo, which defines the standard abbreviations etc. WriteBlockInfo(VE, Stream); + // Emit information about attribute groups. + WriteAttributeGroupTable(VE, Stream); + // Emit information about parameter attributes. WriteAttributeTable(VE, Stream); @@ -1931,7 +1987,7 @@ static void EmitDarwinBCHeaderAndTrailer(SmallVectorImpl &Buffer, /// WriteBitcodeToFile - Write the specified module to the specified output /// stream. void llvm::WriteBitcodeToFile(const Module *M, raw_ostream &Out) { - SmallVector Buffer; + SmallVector Buffer; Buffer.reserve(256*1024); // If this is darwin or another generic macho target, reserve space for the diff --git a/lib/Bitcode/Writer/BitcodeWriterPass.cpp b/lib/Bitcode/Writer/BitcodeWriterPass.cpp index 91e115cba6cc..e5e76e29bd2d 100644 --- a/lib/Bitcode/Writer/BitcodeWriterPass.cpp +++ b/lib/Bitcode/Writer/BitcodeWriterPass.cpp @@ -22,9 +22,9 @@ namespace { static char ID; // Pass identification, replacement for typeid explicit WriteBitcodePass(raw_ostream &o) : ModulePass(ID), OS(o) {} - + const char *getPassName() const { return "Bitcode Writer"; } - + bool runOnModule(Module &M) { WriteBitcodeToFile(&M, OS); return false; diff --git a/lib/Bitcode/Writer/ValueEnumerator.cpp b/lib/Bitcode/Writer/ValueEnumerator.cpp index 1ed9004eb5a1..8bac6da89285 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.cpp +++ b/lib/Bitcode/Writer/ValueEnumerator.cpp @@ -12,20 +12,20 @@ //===----------------------------------------------------------------------===// #include "ValueEnumerator.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/ValueSymbolTable.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/ValueSymbolTable.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include using namespace llvm; -static bool isIntegerValue(const std::pair &V) { - return V.first->getType()->isIntegerTy(); +static bool isIntOrIntVectorValue(const std::pair &V) { + return V.first->getType()->isIntOrIntVectorTy(); } /// ValueEnumerator - Enumerate module-level information. @@ -60,7 +60,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { I != E; ++I) EnumerateValue(I->getAliasee()); - // Insert constants and metadata that are named at module level into the slot + // Insert constants and metadata that are named at module level into the slot // pool so that the module symbol table can refer to them... EnumerateValueSymbolTable(M->getValueSymbolTable()); EnumerateNamedMetadata(M); @@ -95,7 +95,7 @@ ValueEnumerator::ValueEnumerator(const Module *M) { I->getAllMetadataOtherThanDebugLoc(MDs); for (unsigned i = 0, e = MDs.size(); i != e; ++i) EnumerateMetadata(MDs[i].second); - + if (!I->getDebugLoc().isUnknown()) { MDNode *Scope, *IA; I->getDebugLoc().getScopeAndInlinedAt(Scope, IA, I->getContext()); @@ -192,10 +192,11 @@ void ValueEnumerator::OptimizeConstants(unsigned CstStart, unsigned CstEnd) { CstSortPredicate P(*this); std::stable_sort(Values.begin()+CstStart, Values.begin()+CstEnd, P); - // Ensure that integer constants are at the start of the constant pool. This - // is important so that GEP structure indices come before gep constant exprs. + // Ensure that integer and vector of integer constants are at the start of the + // constant pool. This is important so that GEP structure indices come before + // gep constant exprs. std::partition(Values.begin()+CstStart, Values.begin()+CstEnd, - isIntegerValue); + isIntOrIntVectorValue); // Rebuild the modified portion of ValueMap. for (; CstStart != CstEnd; ++CstStart) @@ -362,16 +363,16 @@ void ValueEnumerator::EnumerateType(Type *Ty) { if (StructType *STy = dyn_cast(Ty)) if (!STy->isLiteral()) *TypeID = ~0U; - + // Enumerate all of the subtypes before we enumerate this type. This ensures // that the type will be enumerated in an order that can be directly built. for (Type::subtype_iterator I = Ty->subtype_begin(), E = Ty->subtype_end(); I != E; ++I) EnumerateType(*I); - + // Refresh the TypeID pointer in case the table rehashed. TypeID = &TypeMap[Ty]; - + // Check to see if we got the pointer another way. This can happen when // enumerating recursive types that hit the base case deeper than they start. // @@ -379,10 +380,10 @@ void ValueEnumerator::EnumerateType(Type *Ty) { // then emit the definition now that all of its contents are available. if (*TypeID && *TypeID != ~0U) return; - + // Add this type now that its contents are all happily enumerated. Types.push_back(Ty); - + *TypeID = Types.size(); } @@ -390,7 +391,7 @@ void ValueEnumerator::EnumerateType(Type *Ty) { // walk through it, enumerating the types of the constant. void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateType(V->getType()); - + if (const Constant *C = dyn_cast(V)) { // If this constant is already enumerated, ignore it, we know its type must // be enumerated. @@ -400,11 +401,11 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { // them. for (unsigned i = 0, e = C->getNumOperands(); i != e; ++i) { const Value *Op = C->getOperand(i); - + // Don't enumerate basic blocks here, this happens as operands to // blockaddress. if (isa(Op)) continue; - + EnumerateOperandType(Op); } @@ -417,14 +418,25 @@ void ValueEnumerator::EnumerateOperandType(const Value *V) { EnumerateMetadata(V); } -void ValueEnumerator::EnumerateAttributes(const AttrListPtr &PAL) { +void ValueEnumerator::EnumerateAttributes(AttributeSet PAL) { if (PAL.isEmpty()) return; // null is always 0. + // Do a lookup. - unsigned &Entry = AttributeMap[PAL.getRawPointer()]; + unsigned &Entry = AttributeMap[PAL]; if (Entry == 0) { // Never saw this before, add it. - Attributes.push_back(PAL); - Entry = Attributes.size(); + Attribute.push_back(PAL); + Entry = Attribute.size(); + } + + // Do lookups for all attribute groups. + for (unsigned i = 0, e = PAL.getNumSlots(); i != e; ++i) { + AttributeSet AS = PAL.getSlotAttributes(i); + unsigned &Entry = AttributeGroupMap[AS]; + if (Entry == 0) { + AttributeGroups.push_back(AS); + Entry = AttributeGroups.size(); + } } } @@ -481,7 +493,7 @@ void ValueEnumerator::incorporateFunction(const Function &F) { if (N->isFunctionLocal() && N->getFunction()) FnLocalMDVector.push_back(N); } - + if (!I->getType()->isVoidTy()) EnumerateValue(I); } diff --git a/lib/Bitcode/Writer/ValueEnumerator.h b/lib/Bitcode/Writer/ValueEnumerator.h index 75468e6c5e2e..0af6164c944f 100644 --- a/lib/Bitcode/Writer/ValueEnumerator.h +++ b/lib/Bitcode/Writer/ValueEnumerator.h @@ -16,7 +16,7 @@ #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Attributes.h" +#include "llvm/IR/Attributes.h" #include namespace llvm { @@ -29,7 +29,7 @@ class Function; class Module; class MDNode; class NamedMDNode; -class AttrListPtr; +class AttributeSet; class ValueSymbolTable; class MDSymbolTable; class raw_ostream; @@ -51,15 +51,19 @@ private: ValueList MDValues; SmallVector FunctionLocalMDs; ValueMapType MDValueMap; - - typedef DenseMap AttributeMapType; + + typedef DenseMap AttributeGroupMapType; + AttributeGroupMapType AttributeGroupMap; + std::vector AttributeGroups; + + typedef DenseMap AttributeMapType; AttributeMapType AttributeMap; - std::vector Attributes; - + std::vector Attribute; + /// GlobalBasicBlockIDs - This map memoizes the basic block ID's referenced by /// the "getGlobalBasicBlockID" method. mutable DenseMap GlobalBasicBlockIDs; - + typedef DenseMap InstructionMapType; InstructionMapType InstructionMap; unsigned InstructionCount; @@ -67,7 +71,7 @@ private: /// BasicBlocks - This contains all the basic blocks for the currently /// incorporated function. Their reverse mapping is stored in ValueMap. std::vector BasicBlocks; - + /// When a function is incorporated, this is the size of the Values list /// before incorporation. unsigned NumModuleValues; @@ -98,33 +102,43 @@ public: unsigned getInstructionID(const Instruction *I) const; void setInstructionID(const Instruction *I); - unsigned getAttributeID(const AttrListPtr &PAL) const { + unsigned getAttributeID(AttributeSet PAL) const { if (PAL.isEmpty()) return 0; // Null maps to zero. - AttributeMapType::const_iterator I = AttributeMap.find(PAL.getRawPointer()); + AttributeMapType::const_iterator I = AttributeMap.find(PAL); assert(I != AttributeMap.end() && "Attribute not in ValueEnumerator!"); return I->second; } + unsigned getAttributeGroupID(AttributeSet PAL) const { + if (PAL.isEmpty()) return 0; // Null maps to zero. + AttributeGroupMapType::const_iterator I = AttributeGroupMap.find(PAL); + assert(I != AttributeGroupMap.end() && "Attribute not in ValueEnumerator!"); + return I->second; + } + /// getFunctionConstantRange - Return the range of values that corresponds to /// function-local constants. void getFunctionConstantRange(unsigned &Start, unsigned &End) const { Start = FirstFuncConstantID; End = FirstInstID; } - + const ValueList &getValues() const { return Values; } const ValueList &getMDValues() const { return MDValues; } - const SmallVector &getFunctionLocalMDValues() const { + const SmallVector &getFunctionLocalMDValues() const { return FunctionLocalMDs; } const TypeList &getTypes() const { return Types; } const std::vector &getBasicBlocks() const { - return BasicBlocks; + return BasicBlocks; + } + const std::vector &getAttributes() const { + return Attribute; } - const std::vector &getAttributes() const { - return Attributes; + const std::vector &getAttributeGroups() const { + return AttributeGroups; } - + /// getGlobalBasicBlockID - This returns the function-specific ID for the /// specified basic block. This is relatively expensive information, so it /// should only be used by rare constructs such as address-of-label. @@ -138,7 +152,7 @@ public: private: void OptimizeConstants(unsigned CstStart, unsigned CstEnd); - + void EnumerateMDNodeOperands(const MDNode *N); void EnumerateMetadata(const Value *MD); void EnumerateFunctionLocalMetadata(const MDNode *N); @@ -146,8 +160,8 @@ private: void EnumerateValue(const Value *V); void EnumerateType(Type *T); void EnumerateOperandType(const Value *V); - void EnumerateAttributes(const AttrListPtr &PAL); - + void EnumerateAttributes(AttributeSet PAL); + void EnumerateValueSymbolTable(const ValueSymbolTable &ST); void EnumerateNamedMetadata(const Module *M); }; diff --git a/lib/CMakeLists.txt b/lib/CMakeLists.txt index fb63c63f327c..76ebe9aca9a3 100644 --- a/lib/CMakeLists.txt +++ b/lib/CMakeLists.txt @@ -1,6 +1,7 @@ # `Support' and `TableGen' libraries are added on the top-level CMakeLists.txt -add_subdirectory(VMCore) +add_subdirectory(IR) +add_subdirectory(IRReader) add_subdirectory(CodeGen) add_subdirectory(Bitcode) add_subdirectory(Transforms) @@ -8,6 +9,7 @@ add_subdirectory(Linker) add_subdirectory(Analysis) add_subdirectory(MC) add_subdirectory(Object) +add_subdirectory(Option) add_subdirectory(DebugInfo) add_subdirectory(ExecutionEngine) add_subdirectory(Target) diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.cpp b/lib/CodeGen/AggressiveAntiDepBreaker.cpp index 7a1c049d522d..c50f8b5a42ad 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.cpp +++ b/lib/CodeGen/AggressiveAntiDepBreaker.cpp @@ -20,14 +20,13 @@ #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; // If DebugDiv > 0 then only break antidep with (ID % DebugDiv) == DebugMod @@ -152,23 +151,7 @@ void AggressiveAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { std::vector &KillIndices = State->GetKillIndices(); std::vector &DefIndices = State->GetDefIndices(); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - State->UnionGroups(Reg, 0); - KillIndices[Reg] = BB->size(); - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), @@ -616,7 +599,7 @@ bool AggressiveAntiDepBreaker::FindSuitableFreeRegisters( const TargetRegisterClass *SuperRC = TRI->getMinimalPhysRegClass(SuperReg, MVT::Other); - ArrayRef Order = RegClassInfo.getOrder(SuperRC); + ArrayRef Order = RegClassInfo.getOrder(SuperRC); if (Order.empty()) { DEBUG(dbgs() << "\tEmpty Super Regclass!!\n"); return false; diff --git a/lib/CodeGen/AggressiveAntiDepBreaker.h b/lib/CodeGen/AggressiveAntiDepBreaker.h index 706778485429..6683630fba6d 100644 --- a/lib/CodeGen/AggressiveAntiDepBreaker.h +++ b/lib/CodeGen/AggressiveAntiDepBreaker.h @@ -18,15 +18,15 @@ #define LLVM_CODEGEN_AGGRESSIVEANTIDEPBREAKER_H #include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include namespace llvm { diff --git a/lib/CodeGen/AllocationOrder.cpp b/lib/CodeGen/AllocationOrder.cpp index 7cde136c5ef3..3fa1f8ff206c 100644 --- a/lib/CodeGen/AllocationOrder.cpp +++ b/lib/CodeGen/AllocationOrder.cpp @@ -14,10 +14,14 @@ // //===----------------------------------------------------------------------===// +#define DEBUG_TYPE "regalloc" #include "AllocationOrder.h" -#include "VirtRegMap.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -25,56 +29,24 @@ using namespace llvm; AllocationOrder::AllocationOrder(unsigned VirtReg, const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo) - : Begin(0), End(0), Pos(0), RCI(RegClassInfo), OwnedBegin(false) { - const TargetRegisterClass *RC = VRM.getRegInfo().getRegClass(VirtReg); - std::pair HintPair = - VRM.getRegInfo().getRegAllocationHint(VirtReg); - const MachineRegisterInfo &MRI = VRM.getRegInfo(); - - // HintPair.second is a register, phys or virt. - Hint = HintPair.second; - - // Translate to physreg, or 0 if not assigned yet. - if (TargetRegisterInfo::isVirtualRegister(Hint)) - Hint = VRM.getPhys(Hint); - - // The first hint pair component indicates a target-specific hint. - if (HintPair.first) { - const TargetRegisterInfo &TRI = VRM.getTargetRegInfo(); - // The remaining allocation order may depend on the hint. - ArrayRef Order = - TRI.getRawAllocationOrder(RC, HintPair.first, Hint, - VRM.getMachineFunction()); - if (Order.empty()) - return; - - // Copy the allocation order with reserved registers removed. - OwnedBegin = true; - unsigned *P = new unsigned[Order.size()]; - Begin = P; - for (unsigned i = 0; i != Order.size(); ++i) - if (!MRI.isReserved(Order[i])) - *P++ = Order[i]; - End = P; - - // Target-dependent hints require resolution. - Hint = TRI.ResolveRegAllocHint(HintPair.first, Hint, - VRM.getMachineFunction()); - } else { - // If there is no hint or just a normal hint, use the cached allocation - // order from RegisterClassInfo. - ArrayRef O = RCI.getOrder(RC); - Begin = O.begin(); - End = O.end(); - } - - // The hint must be a valid physreg for allocation. - if (Hint && (!TargetRegisterInfo::isPhysicalRegister(Hint) || - !RC->contains(Hint) || MRI.isReserved(Hint))) - Hint = 0; -} - -AllocationOrder::~AllocationOrder() { - if (OwnedBegin) - delete [] Begin; + : Pos(0) { + const MachineFunction &MF = VRM.getMachineFunction(); + const TargetRegisterInfo *TRI = &VRM.getTargetRegInfo(); + Order = RegClassInfo.getOrder(MF.getRegInfo().getRegClass(VirtReg)); + TRI->getRegAllocationHints(VirtReg, Order, Hints, MF, &VRM); + rewind(); + + DEBUG({ + if (!Hints.empty()) { + dbgs() << "hints:"; + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + dbgs() << ' ' << PrintReg(Hints[I], TRI); + dbgs() << '\n'; + } + }); +#ifndef NDEBUG + for (unsigned I = 0, E = Hints.size(); I != E; ++I) + assert(std::find(Order.begin(), Order.end(), Hints[I]) != Order.end() && + "Target hint is outside allocation order."); +#endif } diff --git a/lib/CodeGen/AllocationOrder.h b/lib/CodeGen/AllocationOrder.h index 0ce7e0c3b5f6..aed461a7ed02 100644 --- a/lib/CodeGen/AllocationOrder.h +++ b/lib/CodeGen/AllocationOrder.h @@ -17,21 +17,21 @@ #ifndef LLVM_CODEGEN_ALLOCATIONORDER_H #define LLVM_CODEGEN_ALLOCATIONORDER_H +#include "llvm/ADT/ArrayRef.h" +#include "llvm/MC/MCRegisterInfo.h" + namespace llvm { class RegisterClassInfo; class VirtRegMap; class AllocationOrder { - const unsigned *Begin; - const unsigned *End; - const unsigned *Pos; - const RegisterClassInfo &RCI; - unsigned Hint; - bool OwnedBegin; -public: + SmallVector Hints; + ArrayRef Order; + int Pos; - /// AllocationOrder - Create a new AllocationOrder for VirtReg. +public: + /// Create a new AllocationOrder for VirtReg. /// @param VirtReg Virtual register to allocate for. /// @param VRM Virtual register map for function. /// @param RegClassInfo Information about reserved and allocatable registers. @@ -39,32 +39,45 @@ public: const VirtRegMap &VRM, const RegisterClassInfo &RegClassInfo); - ~AllocationOrder(); + /// Get the allocation order without reordered hints. + ArrayRef getOrder() const { return Order; } - /// next - Return the next physical register in the allocation order, or 0. - /// It is safe to call next again after it returned 0. - /// It will keep returning 0 until rewind() is called. + /// Return the next physical register in the allocation order, or 0. + /// It is safe to call next() again after it returned 0, it will keep + /// returning 0 until rewind() is called. unsigned next() { - // First take the hint. - if (!Pos) { - Pos = Begin; - if (Hint) - return Hint; - } - // Then look at the order from TRI. - while (Pos != End) { - unsigned Reg = *Pos++; - if (Reg != Hint) + if (Pos < 0) + return Hints.end()[Pos++]; + while (Pos < int(Order.size())) { + unsigned Reg = Order[Pos++]; + if (!isHint(Reg)) return Reg; } return 0; } - /// rewind - Start over from the beginning. - void rewind() { Pos = 0; } + /// As next(), but allow duplicates to be returned, and stop before the + /// Limit'th register in the RegisterClassInfo allocation order. + /// + /// This can produce more than Limit registers if there are hints. + unsigned nextWithDups(unsigned Limit) { + if (Pos < 0) + return Hints.end()[Pos++]; + if (Pos < int(Limit)) + return Order[Pos++]; + return 0; + } + + /// Start over from the beginning. + void rewind() { Pos = -int(Hints.size()); } - /// isHint - Return true if PhysReg is a preferred register. - bool isHint(unsigned PhysReg) const { return PhysReg == Hint; } + /// Return true if the last register returned from next() was a preferred register. + bool isHint() const { return Pos <= 0; } + + /// Return true if PhysReg is a preferred register. + bool isHint(unsigned PhysReg) const { + return std::find(Hints.begin(), Hints.end(), PhysReg) != Hints.end(); + } }; } // end namespace llvm diff --git a/lib/CodeGen/Analysis.cpp b/lib/CodeGen/Analysis.cpp index 5162ad762e73..dd7282c0ad97 100644 --- a/lib/CodeGen/Analysis.cpp +++ b/lib/CodeGen/Analysis.cpp @@ -13,19 +13,17 @@ #include "llvm/CodeGen/Analysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLowering.h" using namespace llvm; /// ComputeLinearIndex - Given an LLVM IR aggregate type and a sequence @@ -266,8 +264,7 @@ static const Value *getNoopInput(const Value *V, const TargetLowering &TLI) { /// between it and the return. /// /// This function only tests target-independent requirements. -bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, - const TargetLowering &TLI) { +bool llvm::isInTailCallPosition(ImmutableCallSite CS,const TargetLowering &TLI){ const Instruction *I = CS.getInstruction(); const BasicBlock *ExitBB = I->getParent(); const TerminatorInst *Term = ExitBB->getTerminator(); @@ -313,14 +310,16 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, // Conservatively require the attributes of the call to match those of // the return. Ignore noalias because it doesn't affect the call sequence. const Function *F = ExitBB->getParent(); - Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CalleeRetAttr).removeAttribute(Attributes::NoAlias) != - AttrBuilder(CallerRetAttr).removeAttribute(Attributes::NoAlias)) + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias) != + AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex). + removeAttribute(Attribute::NoAlias)) return false; // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attributes::ZExt) || - CallerRetAttr.hasAttribute(Attributes::SExt)) + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) return false; // Otherwise, make sure the unmodified return value of I is the return value. @@ -348,23 +347,3 @@ bool llvm::isInTailCallPosition(ImmutableCallSite CS, Attributes CalleeRetAttr, return true; } - -bool llvm::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, - SDValue &Chain, const TargetLowering &TLI) { - const Function *F = DAG.getMachineFunction().getFunction(); - - // Conservatively require the attributes of the call to match those of - // the return. Ignore noalias because it doesn't affect the call sequence. - Attributes CallerRetAttr = F->getAttributes().getRetAttributes(); - if (AttrBuilder(CallerRetAttr) - .removeAttribute(Attributes::NoAlias).hasAttributes()) - return false; - - // It's not safe to eliminate the sign / zero extension of the return value. - if (CallerRetAttr.hasAttribute(Attributes::ZExt) || - CallerRetAttr.hasAttribute(Attributes::SExt)) - return false; - - // Check if the only use is a function return node. - return TLI.isUsedByReturnOnly(Node, Chain); -} diff --git a/lib/CodeGen/AsmPrinter/ARMException.cpp b/lib/CodeGen/AsmPrinter/ARMException.cpp index b2ebf04e518f..188047d94f48 100644 --- a/lib/CodeGen/AsmPrinter/ARMException.cpp +++ b/lib/CodeGen/AsmPrinter/ARMException.cpp @@ -12,32 +12,31 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" using namespace llvm; -cl::opt +static cl::opt EnableARMEHABIDescriptors("arm-enable-ehabi-descriptors", cl::Hidden, cl::desc("Generate ARM EHABI tables with unwinding descriptors"), cl::init(false)); @@ -69,24 +68,69 @@ void ARMException::EndFunction() { Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_end", Asm->getFunctionNumber())); - // Emit references to personality. - if (const Function * Personality = - MMI->getPersonalities()[MMI->getPersonalityIndex()]) { - MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); - Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); - Asm->OutStreamer.EmitPersonality(PerSym); - } - if (EnableARMEHABIDescriptors) { // Map all labels and get rid of any dead landing pads. MMI->TidyLandingPads(); - Asm->OutStreamer.EmitHandlerData(); + if (!MMI->getLandingPads().empty()) { + // Emit references to personality. + if (const Function * Personality = + MMI->getPersonalities()[MMI->getPersonalityIndex()]) { + MCSymbol *PerSym = Asm->Mang->getSymbol(Personality); + Asm->OutStreamer.EmitSymbolAttribute(PerSym, MCSA_Global); + Asm->OutStreamer.EmitPersonality(PerSym); + } + + // Emit .handlerdata directive. + Asm->OutStreamer.EmitHandlerData(); - // Emit actual exception table - EmitExceptionTable(); + // Emit actual exception table + EmitExceptionTable(); + } } } Asm->OutStreamer.EmitFnEnd(); } + +void ARMException::EmitTypeInfos(unsigned TTypeEncoding) { + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + + int Entry = 0; + // Emit the Catch TypeInfos. + if (VerboseAsm && !TypeInfos.empty()) { + Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = TypeInfos.size(); + } + + for (std::vector::const_reverse_iterator + I = TypeInfos.rbegin(), E = TypeInfos.rend(); I != E; ++I) { + const GlobalVariable *GV = *I; + if (VerboseAsm) + Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); + Asm->EmitTTypeReference(GV, TTypeEncoding); + } + + // Emit the Exception Specifications. + if (VerboseAsm && !FilterIds.empty()) { + Asm->OutStreamer.AddComment(">> Filter TypeInfos <<"); + Asm->OutStreamer.AddBlankLine(); + Entry = 0; + } + for (std::vector::const_iterator + I = FilterIds.begin(), E = FilterIds.end(); I < E; ++I) { + unsigned TypeID = *I; + if (VerboseAsm) { + --Entry; + if (TypeID != 0) + Asm->OutStreamer.AddComment("FilterInfo " + Twine(Entry)); + } + + Asm->EmitTTypeReference((TypeID == 0 ? 0 : TypeInfos[TypeID - 1]), + TTypeEncoding); + } +} diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index d74a70362a2a..d4a745d985e8 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -15,8 +15,10 @@ #include "llvm/CodeGen/AsmPrinter.h" #include "DwarfDebug.h" #include "DwarfException.h" -#include "llvm/DebugInfo.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/GCMetadataPrinter.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -24,7 +26,10 @@ #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" @@ -32,20 +37,16 @@ #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/Timer.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Format.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/Timer.h" using namespace llvm; static const char *DWARFGroupName = "DWARF Emission"; @@ -90,9 +91,6 @@ static unsigned getGVAlignmentLog2(const GlobalValue *GV, const DataLayout &TD, return NumBits; } - - - AsmPrinter::AsmPrinter(TargetMachine &tm, MCStreamer &Streamer) : MachineFunctionPass(ID), TM(tm), MAI(tm.getMCAsmInfo()), @@ -130,7 +128,6 @@ const TargetLoweringObjectFile &AsmPrinter::getObjFileLowering() const { return TM.getTargetLowering()->getObjFileLowering(); } - /// getDataLayout - Return information about data layout. const DataLayout &AsmPrinter::getDataLayout() const { return *TM.getDataLayout(); @@ -153,6 +150,8 @@ void AsmPrinter::getAnalysisUsage(AnalysisUsage &AU) const { } bool AsmPrinter::doInitialization(Module &M) { + OutStreamer.InitStreamer(); + MMI = getAnalysisIfAvailable(); MMI->AnalyzeModule(M); @@ -312,8 +311,13 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { return; } - if (Align == 1 || - MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { + // Use .lcomm only if it supports user-specified alignment. + // Otherwise, while it would still be correct to use .lcomm in some + // cases (e.g. when Align == 1), the external assembler might enfore + // some -unknown- default alignment behavior, which could cause + // spurious differences between external and integrated assembler. + // Prefer to simply fall back to .local / .comm in this case. + if (MAI->getLCOMMDirectiveAlignmentType() != LCOMM::NoAlignment) { // .lcomm _foo, 42 OutStreamer.EmitLocalCommonSymbol(GVSym, Size, Align); return; @@ -387,9 +391,9 @@ void AsmPrinter::EmitGlobalVariable(const GlobalVariable *GV) { // - pointer to mangled symbol above with initializer unsigned PtrSize = TD->getPointerSizeInBits()/8; OutStreamer.EmitSymbolValue(GetExternalSymbolSymbol("_tlv_bootstrap"), - PtrSize, 0); - OutStreamer.EmitIntValue(0, PtrSize, 0); - OutStreamer.EmitSymbolValue(MangSym, PtrSize, 0); + PtrSize); + OutStreamer.EmitIntValue(0, PtrSize); + OutStreamer.EmitSymbolValue(MangSym, PtrSize); OutStreamer.AddBlankLine(); return; @@ -943,6 +947,8 @@ bool AsmPrinter::doFinalization(Module &M) { MMI = 0; OutStreamer.Finish(); + OutStreamer.reset(); + return false; } @@ -1034,7 +1040,7 @@ void AsmPrinter::EmitConstantPool() { // Emit inter-object padding for alignment. unsigned AlignMask = CPE.getAlignment() - 1; unsigned NewOffset = (Offset + AlignMask) & ~AlignMask; - OutStreamer.EmitFill(NewOffset - Offset, 0/*fillval*/, 0/*addrspace*/); + OutStreamer.EmitZeros(NewOffset - Offset); Type *Ty = CPE.getType(); Offset = NewOffset + TM.getDataLayout()->getTypeAllocSize(Ty); @@ -1197,7 +1203,7 @@ void AsmPrinter::EmitJumpTableEntry(const MachineJumpTableInfo *MJTI, assert(Value && "Unknown entry kind!"); unsigned EntrySize = MJTI->getEntrySize(*TM.getDataLayout()); - OutStreamer.EmitValue(Value, EntrySize, /*addrspace*/0); + OutStreamer.EmitValue(Value, EntrySize); } @@ -1320,19 +1326,19 @@ void AsmPrinter::EmitXXStructorList(const Constant *List, bool isCtor) { /// EmitInt8 - Emit a byte directive and value. /// void AsmPrinter::EmitInt8(int Value) const { - OutStreamer.EmitIntValue(Value, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 1); } /// EmitInt16 - Emit a short directive and value. /// void AsmPrinter::EmitInt16(int Value) const { - OutStreamer.EmitIntValue(Value, 2, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 2); } /// EmitInt32 - Emit a long directive and value. /// void AsmPrinter::EmitInt32(int Value) const { - OutStreamer.EmitIntValue(Value, 4, 0/*addrspace*/); + OutStreamer.EmitIntValue(Value, 4); } /// EmitLabelDifference - Emit something like ".long Hi-Lo" where the size @@ -1347,14 +1353,14 @@ void AsmPrinter::EmitLabelDifference(const MCSymbol *Hi, const MCSymbol *Lo, OutContext); if (!MAI->hasSetDirective()) { - OutStreamer.EmitValue(Diff, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, Size); return; } // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, Size, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, Size); } /// EmitLabelOffsetDifference - Emit something like ".long Hi+Offset-Lo" @@ -1378,12 +1384,12 @@ void AsmPrinter::EmitLabelOffsetDifference(const MCSymbol *Hi, uint64_t Offset, OutContext); if (!MAI->hasSetDirective()) - OutStreamer.EmitValue(Diff, 4, 0/*AddrSpace*/); + OutStreamer.EmitValue(Diff, 4); else { // Otherwise, emit with .set (aka assignment). MCSymbol *SetLabel = GetTempSymbol("set", SetCounter++); OutStreamer.EmitAssignment(SetLabel, Diff); - OutStreamer.EmitSymbolValue(SetLabel, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(SetLabel, 4); } } @@ -1401,7 +1407,7 @@ void AsmPrinter::EmitLabelPlusOffset(const MCSymbol *Label, uint64_t Offset, MCConstantExpr::Create(Offset, OutContext), OutContext); - OutStreamer.EmitValue(Expr, Size, 0/*AddrSpace*/); + OutStreamer.EmitValue(Expr, Size); } @@ -1472,19 +1478,14 @@ static const MCExpr *lowerConstant(const Constant *CV, AsmPrinter &AP) { case Instruction::GetElementPtr: { const DataLayout &TD = *AP.TM.getDataLayout(); // Generate a symbolic expression for the byte address - const Constant *PtrVal = CE->getOperand(0); - SmallVector IdxVec(CE->op_begin()+1, CE->op_end()); - int64_t Offset = TD.getIndexedOffset(PtrVal->getType(), IdxVec); + APInt OffsetAI(TD.getPointerSizeInBits(), 0); + cast(CE)->accumulateConstantOffset(TD, OffsetAI); const MCExpr *Base = lowerConstant(CE->getOperand(0), AP); - if (Offset == 0) + if (!OffsetAI) return Base; - // Truncate/sext the offset to the pointer size. - unsigned Width = TD.getPointerSizeInBits(); - if (Width < 64) - Offset = SignExtend64(Offset, Width); - + int64_t Offset = OffsetAI.getSExtValue(); return MCBinaryExpr::CreateAdd(Base, MCConstantExpr::Create(Offset, Ctx), Ctx); } @@ -1614,7 +1615,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { } return Byte; } - + if (const ConstantDataSequential *CDS = dyn_cast(V)) return isRepeatedByteSequence(CDS); @@ -1623,7 +1624,7 @@ static int isRepeatedByteSequence(const Value *V, TargetMachine &TM) { static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, unsigned AddrSpace,AsmPrinter &AP){ - + // See if we can aggregate this into a .fill, if so, emit it as such. int Value = isRepeatedByteSequence(CDS, AP.TM); if (Value != -1) { @@ -1632,7 +1633,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, if (Bytes > 1) return AP.OutStreamer.EmitFill(Bytes, Value, AddrSpace); } - + // If this can be emitted with .ascii/.asciz, emit it as such. if (CDS->isString()) return AP.OutStreamer.EmitBytes(CDS->getAsString(), AddrSpace); @@ -1656,7 +1657,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, float F; uint32_t I; }; - + F = CDS->getElementAsFloat(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "float " << F << '\n'; @@ -1669,7 +1670,7 @@ static void emitGlobalConstantDataSequential(const ConstantDataSequential *CDS, double F; uint64_t I; }; - + F = CDS->getElementAsDouble(i); if (AP.isVerbose()) AP.OutStreamer.GetCommentOS() << "double " << F << '\n'; @@ -1745,87 +1746,48 @@ static void emitGlobalConstantStruct(const ConstantStruct *CS, static void emitGlobalConstantFP(const ConstantFP *CFP, unsigned AddrSpace, AsmPrinter &AP) { - if (CFP->getType()->isHalfTy()) { - if (AP.isVerbose()) { - SmallString<10> Str; - CFP->getValueAPF().toString(Str); - AP.OutStreamer.GetCommentOS() << "half " << Str << '\n'; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 2, AddrSpace); - return; - } - - if (CFP->getType()->isFloatTy()) { - if (AP.isVerbose()) { - float Val = CFP->getValueAPF().convertToFloat(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "float " << Val << '\n' - << " (" << format("0x%x", IntVal) << ")\n"; - } - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 4, AddrSpace); - return; - } + APInt API = CFP->getValueAPF().bitcastToAPInt(); - // FP Constants are printed as integer constants to avoid losing - // precision. - if (CFP->getType()->isDoubleTy()) { - if (AP.isVerbose()) { - double Val = CFP->getValueAPF().convertToDouble(); - uint64_t IntVal = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.GetCommentOS() << "double " << Val << '\n' - << " (" << format("0x%lx", IntVal) << ")\n"; - } + // First print a comment with what we think the original floating-point value + // should have been. + if (AP.isVerbose()) { + SmallString<8> StrVal; + CFP->getValueAPF().toString(StrVal); - uint64_t Val = CFP->getValueAPF().bitcastToAPInt().getZExtValue(); - AP.OutStreamer.EmitIntValue(Val, 8, AddrSpace); - return; + CFP->getType()->print(AP.OutStreamer.GetCommentOS()); + AP.OutStreamer.GetCommentOS() << ' ' << StrVal << '\n'; } - if (CFP->getType()->isX86_FP80Ty()) { - // all long double variants are printed as hex - // API needed to prevent premature destruction - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.isVerbose()) { - // Convert to double so we can print the approximate val as a comment. - APFloat DoubleVal = CFP->getValueAPF(); - bool ignored; - DoubleVal.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, - &ignored); - AP.OutStreamer.GetCommentOS() << "x86_fp80 ~= " - << DoubleVal.convertToDouble() << '\n'; - } + // Now iterate through the APInt chunks, emitting them in endian-correct + // order, possibly with a smaller chunk at beginning/end (e.g. for x87 80-bit + // floats). + unsigned NumBytes = API.getBitWidth() / 8; + unsigned TrailingBytes = NumBytes % sizeof(uint64_t); + const uint64_t *p = API.getRawData(); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - } else { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], 2, AddrSpace); - } + // PPC's long double has odd notions of endianness compared to how LLVM + // handles it: p[0] goes first for *big* endian on PPC. + if (AP.TM.getDataLayout()->isBigEndian() != CFP->getType()->isPPC_FP128Ty()) { + int Chunk = API.getNumWords() - 1; - // Emit the tail padding for the long double. - const DataLayout &TD = *AP.TM.getDataLayout(); - AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - - TD.getTypeStoreSize(CFP->getType()), AddrSpace); - return; - } + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk--], TrailingBytes, AddrSpace); - assert(CFP->getType()->isPPC_FP128Ty() && - "Floating point constant type not handled"); - // All long double variants are printed as hex - // API needed to prevent premature destruction. - APInt API = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t *p = API.getRawData(); - if (AP.TM.getDataLayout()->isBigEndian()) { - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); + for (; Chunk >= 0; --Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); } else { - AP.OutStreamer.EmitIntValue(p[1], 8, AddrSpace); - AP.OutStreamer.EmitIntValue(p[0], 8, AddrSpace); + unsigned Chunk; + for (Chunk = 0; Chunk < NumBytes / sizeof(uint64_t); ++Chunk) + AP.OutStreamer.EmitIntValue(p[Chunk], sizeof(uint64_t), AddrSpace); + + if (TrailingBytes) + AP.OutStreamer.EmitIntValue(p[Chunk], TrailingBytes, AddrSpace); } + + // Emit the tail padding for the long double. + const DataLayout &TD = *AP.TM.getDataLayout(); + AP.OutStreamer.EmitZeros(TD.getTypeAllocSize(CFP->getType()) - + TD.getTypeStoreSize(CFP->getType()), AddrSpace); } static void emitGlobalConstantLargeInt(const ConstantInt *CI, @@ -1878,7 +1840,7 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, if (const ConstantDataSequential *CDS = dyn_cast(CV)) return emitGlobalConstantDataSequential(CDS, AddrSpace, AP); - + if (const ConstantArray *CVA = dyn_cast(CV)) return emitGlobalConstantArray(CVA, AddrSpace, AP); @@ -1900,10 +1862,10 @@ static void emitGlobalConstantImpl(const Constant *CV, unsigned AddrSpace, return emitGlobalConstantImpl(New, AddrSpace, AP); } } - + if (const ConstantVector *V = dyn_cast(CV)) return emitGlobalConstantVector(V, AddrSpace, AP); - + // Otherwise, it must be a ConstantExpr. Lower it to an MCExpr, then emit it // thread the streamer with EmitValue. AP.OutStreamer.EmitValue(lowerConstant(CV, AP), Size, AddrSpace); diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp index d94e1fe61bf7..156acace553d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterDwarf.cpp @@ -13,19 +13,19 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/ADT/Twine.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/DataLayout.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/Twine.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -46,19 +46,19 @@ void AsmPrinter::EmitULEB128(unsigned Value, const char *Desc, if (isVerbose() && Desc) OutStreamer.AddComment(Desc); - OutStreamer.EmitULEB128IntValue(Value, 0/*addrspace*/, PadTo); + OutStreamer.EmitULEB128IntValue(Value, PadTo); } /// EmitCFAByte - Emit a .byte 42 directive for a DW_CFA_xxx value. void AsmPrinter::EmitCFAByte(unsigned Val) const { if (isVerbose()) { if (Val >= dwarf::DW_CFA_offset && Val < dwarf::DW_CFA_offset+64) - OutStreamer.AddComment("DW_CFA_offset + Reg (" + + OutStreamer.AddComment("DW_CFA_offset + Reg (" + Twine(Val-dwarf::DW_CFA_offset) + ")"); else OutStreamer.AddComment(dwarf::CallFrameString(Val)); } - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + OutStreamer.EmitIntValue(Val, 1); } static const char *DecodeDWARFEncoding(unsigned Encoding) { @@ -83,7 +83,7 @@ static const char *DecodeDWARFEncoding(unsigned Encoding) { case dwarf::DW_EH_PE_indirect | dwarf::DW_EH_PE_pcrel |dwarf::DW_EH_PE_sdata8: return "indirect pcrel sdata8"; } - + return ""; } @@ -101,15 +101,15 @@ void AsmPrinter::EmitEncodingByte(unsigned Val, const char *Desc) const { OutStreamer.AddComment(Twine("Encoding = ") + DecodeDWARFEncoding(Val)); } - - OutStreamer.EmitIntValue(Val, 1, 0/*addrspace*/); + + OutStreamer.EmitIntValue(Val, 1); } /// GetSizeOfEncodedValue - Return the size of the encoding in bytes. unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { if (Encoding == dwarf::DW_EH_PE_omit) return 0; - + switch (Encoding & 0x07) { default: llvm_unreachable("Invalid encoded value."); case dwarf::DW_EH_PE_absptr: return TM.getDataLayout()->getPointerSize(); @@ -119,20 +119,16 @@ unsigned AsmPrinter::GetSizeOfEncodedValue(unsigned Encoding) const { } } -void AsmPrinter::EmitReference(const MCSymbol *Sym, unsigned Encoding) const { - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - - const MCExpr *Exp = - TLOF.getExprForDwarfReference(Sym, Encoding, OutStreamer); - OutStreamer.EmitAbsValue(Exp, GetSizeOfEncodedValue(Encoding)); -} +void AsmPrinter::EmitTTypeReference(const GlobalValue *GV, + unsigned Encoding) const { + if (GV) { + const TargetLoweringObjectFile &TLOF = getObjFileLowering(); -void AsmPrinter::EmitReference(const GlobalValue *GV, unsigned Encoding)const{ - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - - const MCExpr *Exp = - TLOF.getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); - OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding), /*addrspace*/0); + const MCExpr *Exp = + TLOF.getTTypeGlobalReference(GV, Mang, MMI, Encoding, OutStreamer); + OutStreamer.EmitValue(Exp, GetSizeOfEncodedValue(Encoding)); + } else + OutStreamer.EmitIntValue(0, GetSizeOfEncodedValue(Encoding)); } /// EmitSectionOffset - Emit the 4-byte offset of Label from the start of its @@ -149,22 +145,22 @@ void AsmPrinter::EmitSectionOffset(const MCSymbol *Label, OutStreamer.EmitCOFFSecRel32(Label); return; } - + // Get the section that we're referring to, based on SectionLabel. const MCSection &Section = SectionLabel->getSection(); - + // If Label has already been emitted, verify that it is in the same section as // section label for sanity. assert((!Label->isInSection() || &Label->getSection() == &Section) && "Section offset using wrong section base for label"); - + // If the section in question will end up with an address of 0 anyway, we can // just emit an absolute reference to save a relocation. if (Section.isBaseAddressKnownZero()) { - OutStreamer.EmitSymbolValue(Label, 4, 0/*AddrSpace*/); + OutStreamer.EmitSymbolValue(Label, 4); return; } - + // Otherwise, emit it as a label difference from the start of the section. EmitLabelDifference(Label, SectionLabel, 4); } diff --git a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp index 50f0fc30a07c..abfa330fa29d 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinterInlineAsm.cpp @@ -13,26 +13,26 @@ #define DEBUG_TYPE "asm-printer" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/Constants.h" -#include "llvm/InlineAsm.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" #include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCTargetAsmParser.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Twine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/SourceMgr.h" #include "llvm/Support/TargetRegistry.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; namespace { diff --git a/lib/CodeGen/AsmPrinter/CMakeLists.txt b/lib/CodeGen/AsmPrinter/CMakeLists.txt index 58fe2ed9d357..8d15c069c6f8 100644 --- a/lib/CodeGen/AsmPrinter/CMakeLists.txt +++ b/lib/CodeGen/AsmPrinter/CMakeLists.txt @@ -9,6 +9,7 @@ add_llvm_library(LLVMAsmPrinter DwarfCompileUnit.cpp DwarfDebug.cpp DwarfException.cpp + ErlangGCPrinter.cpp OcamlGCPrinter.cpp Win64Exception.cpp ) diff --git a/lib/CodeGen/AsmPrinter/DIE.cpp b/lib/CodeGen/AsmPrinter/DIE.cpp index 4d73b3c22261..57e0acda890f 100644 --- a/lib/CodeGen/AsmPrinter/DIE.cpp +++ b/lib/CodeGen/AsmPrinter/DIE.cpp @@ -8,16 +8,16 @@ //===----------------------------------------------------------------------===// // // Data structures for DWARF info entries. -// +// //===----------------------------------------------------------------------===// #include "DIE.h" #include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/DataLayout.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -112,6 +112,17 @@ DIE::~DIE() { delete Children[i]; } +/// Climb up the parent chain to get the compile unit DIE this DIE belongs to. +DIE *DIE::getCompileUnit() const{ + DIE *p = getParent(); + while (p) { + if (p->getTag() == dwarf::DW_TAG_compile_unit) + return p; + p = p->getParent(); + } + llvm_unreachable("We should not have orphaned DIEs."); +} + #ifndef NDEBUG void DIE::print(raw_ostream &O, unsigned IncIndent) { IndentCount += IncIndent; @@ -133,7 +144,7 @@ void DIE::print(raw_ostream &O, unsigned IncIndent) { O << "Size: " << Size << "\n"; } - const SmallVector &Data = Abbrev.getData(); + const SmallVectorImpl &Data = Abbrev.getData(); IndentCount += 2; for (unsigned i = 0, N = Data.size(); i < N; ++i) { @@ -193,17 +204,20 @@ void DIEInteger::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_data1: Size = 1; break; case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: Size = 2; break; + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: Size = 4; break; case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: Size = 8; break; + case dwarf::DW_FORM_GNU_str_index: Asm->EmitULEB128(Integer); return; + case dwarf::DW_FORM_GNU_addr_index: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_udata: Asm->EmitULEB128(Integer); return; case dwarf::DW_FORM_sdata: Asm->EmitSLEB128(Integer); return; case dwarf::DW_FORM_addr: Size = Asm->getDataLayout().getPointerSize(); break; default: llvm_unreachable("DIE Value form not supported yet"); } - Asm->OutStreamer.EmitIntValue(Integer, Size, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(Integer, Size); } /// SizeOf - Determine size of integer value in bytes. @@ -216,10 +230,13 @@ unsigned DIEInteger::SizeOf(AsmPrinter *AP, unsigned Form) const { case dwarf::DW_FORM_data1: return sizeof(int8_t); case dwarf::DW_FORM_ref2: // Fall thru case dwarf::DW_FORM_data2: return sizeof(int16_t); + case dwarf::DW_FORM_sec_offset: // Fall thru case dwarf::DW_FORM_ref4: // Fall thru case dwarf::DW_FORM_data4: return sizeof(int32_t); case dwarf::DW_FORM_ref8: // Fall thru case dwarf::DW_FORM_data8: return sizeof(int64_t); + case dwarf::DW_FORM_GNU_str_index: return MCAsmInfo::getULEB128Size(Integer); + case dwarf::DW_FORM_GNU_addr_index: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_udata: return MCAsmInfo::getULEB128Size(Integer); case dwarf::DW_FORM_sdata: return MCAsmInfo::getSLEB128Size(Integer); case dwarf::DW_FORM_addr: return AP->getDataLayout().getPointerSize(); @@ -241,13 +258,14 @@ void DIEInteger::print(raw_ostream &O) { /// EmitValue - Emit label value. /// void DIELabel::EmitValue(AsmPrinter *AP, unsigned Form) const { - AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form), 0/*AddrSpace*/); + AP->OutStreamer.EmitSymbolValue(Label, SizeOf(AP, Form)); } /// SizeOf - Determine size of label value in bytes. /// unsigned DIELabel::SizeOf(AsmPrinter *AP, unsigned Form) const { if (Form == dwarf::DW_FORM_data4) return 4; + if (Form == dwarf::DW_FORM_sec_offset) return 4; if (Form == dwarf::DW_FORM_strp) return 4; return AP->getDataLayout().getPointerSize(); } @@ -306,7 +324,7 @@ void DIEEntry::print(raw_ostream &O) { /// unsigned DIEBlock::ComputeSize(AsmPrinter *AP) { if (!Size) { - const SmallVector &AbbrevData = Abbrev.getData(); + const SmallVectorImpl &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Size += Values[i]->SizeOf(AP, AbbrevData[i].getForm()); } @@ -325,7 +343,7 @@ void DIEBlock::EmitValue(AsmPrinter *Asm, unsigned Form) const { case dwarf::DW_FORM_block: Asm->EmitULEB128(Size); break; } - const SmallVector &AbbrevData = Abbrev.getData(); + const SmallVectorImpl &AbbrevData = Abbrev.getData(); for (unsigned i = 0, N = Values.size(); i < N; ++i) Values[i]->EmitValue(Asm, AbbrevData[i].getForm()); } diff --git a/lib/CodeGen/AsmPrinter/DIE.h b/lib/CodeGen/AsmPrinter/DIE.h index 28a96f3b2b65..c332aa2a7db6 100644 --- a/lib/CodeGen/AsmPrinter/DIE.h +++ b/lib/CodeGen/AsmPrinter/DIE.h @@ -8,7 +8,7 @@ //===----------------------------------------------------------------------===// // // Data structures for DWARF info entries. -// +// //===----------------------------------------------------------------------===// #ifndef CODEGEN_ASMPRINTER_DIE_H__ @@ -66,7 +66,7 @@ namespace llvm { /// Data - Raw data bytes for abbreviation. /// - SmallVector Data; + SmallVector Data; public: DIEAbbrev(uint16_t T, uint16_t C) : Tag(T), ChildrenFlag(C), Data() {} @@ -75,7 +75,7 @@ namespace llvm { uint16_t getTag() const { return Tag; } unsigned getNumber() const { return Number; } uint16_t getChildrenFlag() const { return ChildrenFlag; } - const SmallVector &getData() const { return Data; } + const SmallVectorImpl &getData() const { return Data; } void setTag(uint16_t T) { Tag = T; } void setChildrenFlag(uint16_t CF) { ChildrenFlag = CF; } void setNumber(unsigned N) { Number = N; } @@ -108,7 +108,7 @@ namespace llvm { //===--------------------------------------------------------------------===// /// DIE - A structured debug information entry. Has an abbreviation which - /// describes it's organization. + /// describes its organization. class DIEValue; class DIE { @@ -131,9 +131,9 @@ namespace llvm { DIE *Parent; - /// Attributes values. + /// Attribute values. /// - SmallVector Values; + SmallVector Values; // Private data for print() mutable unsigned IndentCount; @@ -150,12 +150,15 @@ namespace llvm { unsigned getOffset() const { return Offset; } unsigned getSize() const { return Size; } const std::vector &getChildren() const { return Children; } - const SmallVector &getValues() const { return Values; } + const SmallVectorImpl &getValues() const { return Values; } DIE *getParent() const { return Parent; } + /// Climb up the parent chain to get the compile unit DIE this DIE belongs + /// to. + DIE *getCompileUnit() const; void setTag(unsigned Tag) { Abbrev.setTag(Tag); } void setOffset(unsigned O) { Offset = O; } void setSize(unsigned S) { Size = S; } - + /// addValue - Add a value and attributes to a DIE. /// void addValue(unsigned Attribute, unsigned Form, DIEValue *Value) { @@ -232,9 +235,10 @@ namespace llvm { /// static unsigned BestForm(bool IsSigned, uint64_t Int) { if (IsSigned) { - if ((char)Int == (signed)Int) return dwarf::DW_FORM_data1; - if ((short)Int == (signed)Int) return dwarf::DW_FORM_data2; - if ((int)Int == (signed)Int) return dwarf::DW_FORM_data4; + const int64_t SignedInt = Int; + if ((char)Int == SignedInt) return dwarf::DW_FORM_data1; + if ((short)Int == SignedInt) return dwarf::DW_FORM_data2; + if ((int)Int == SignedInt) return dwarf::DW_FORM_data4; } else { if ((unsigned char)Int == Int) return dwarf::DW_FORM_data1; if ((unsigned short)Int == Int) return dwarf::DW_FORM_data2; diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp index 05e0f2fb63b3..f58ec9b4bf46 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #include "DwarfAccelTable.h" -#include "DwarfDebug.h" #include "DIE.h" -#include "llvm/ADT/Twine.h" +#include "DwarfDebug.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCStreamer.h" @@ -32,7 +32,7 @@ const char *DwarfAccelTable::Atom::AtomTypeString(enum AtomType AT) { case eAtomTypeTag: return "eAtomTypeTag"; case eAtomTypeNameFlags: return "eAtomTypeNameFlags"; case eAtomTypeTypeFlags: return "eAtomTypeTypeFlags"; - } + } llvm_unreachable("invalid AtomType!"); } @@ -155,7 +155,7 @@ void DwarfAccelTable::EmitHashes(AsmPrinter *Asm) { HE = Buckets[i].end(); HI != HE; ++HI) { Asm->OutStreamer.AddComment("Hash in Bucket " + Twine(i)); Asm->EmitInt32((*HI)->HashValue); - } + } } } @@ -173,7 +173,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { MCBinaryExpr::CreateSub(MCSymbolRefExpr::Create((*HI)->Sym, Context), MCSymbolRefExpr::Create(SecBegin, Context), Context); - Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t), 0); + Asm->OutStreamer.EmitValue(Sub, sizeof(uint32_t)); } } } @@ -181,7 +181,7 @@ void DwarfAccelTable::EmitOffsets(AsmPrinter *Asm, MCSymbol *SecBegin) { // Walk through the buckets and emit the full data for each element in // the bucket. For the string case emit the dies and the various offsets. // Terminate each HashData bucket with 0. -void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { +void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfUnits *D) { uint64_t PrevHash = UINT64_MAX; for (size_t i = 0, e = Buckets.size(); i < e; ++i) { for (HashList::const_iterator HI = Buckets[i].begin(), @@ -190,7 +190,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { Asm->OutStreamer.EmitLabel((*HI)->Sym); Asm->OutStreamer.AddComment((*HI)->Str); Asm->EmitSectionOffset(D->getStringPoolEntry((*HI)->Str), - D->getStringPool()); + D->getStringPoolSym()); Asm->OutStreamer.AddComment("Num DIEs"); Asm->EmitInt32((*HI)->Data.size()); for (ArrayRef::const_iterator @@ -215,7 +215,7 @@ void DwarfAccelTable::EmitData(AsmPrinter *Asm, DwarfDebug *D) { // Emit the entire data structure to the output file. void DwarfAccelTable::Emit(AsmPrinter *Asm, MCSymbol *SecBegin, - DwarfDebug *D) { + DwarfUnits *D) { // Emit the header. EmitHeader(Asm); @@ -258,7 +258,7 @@ void DwarfAccelTable::print(raw_ostream &O) { for (std::vector::const_iterator DI = Data.begin(), DE = Data.end(); DI != DE; ++DI) (*DI)->print(O); - + } #endif diff --git a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h index 92d1bbe4f7e8..9915bcaa9b69 100644 --- a/lib/CodeGen/AsmPrinter/DwarfAccelTable.h +++ b/lib/CodeGen/AsmPrinter/DwarfAccelTable.h @@ -14,18 +14,18 @@ #ifndef CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ #define CODEGEN_ASMPRINTER_DWARFACCELTABLE_H__ -#include "llvm/ADT/StringMap.h" +#include "DIE.h" #include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/StringMap.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Support/Dwarf.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" -#include "DIE.h" -#include #include +#include // The dwarf accelerator tables are an indirect hash table optimized // for null lookup rather than access to known data. They are output into @@ -51,7 +51,7 @@ // section contains all of the 32-bit hash values in contiguous memory, and // the offsets contain the offset into the data area for the particular // hash. -// +// // For a lookup example, we could hash a function name and take it modulo the // number of buckets giving us our bucket. From there we take the bucket value // as an index into the hashes table and look at each successive hash as long @@ -63,8 +63,8 @@ namespace llvm { class AsmPrinter; class DIE; -class DwarfDebug; - +class DwarfUnits; + class DwarfAccelTable { enum HashFunctionType { @@ -81,7 +81,7 @@ class DwarfAccelTable { // Helper function to compute the number of buckets needed based on // the number of unique hashes. void ComputeBucketCount (void); - + struct TableHeader { uint32_t magic; // 'HASH' magic value to allow endian detection uint16_t version; // Version number. @@ -94,7 +94,7 @@ class DwarfAccelTable { // Also written to disk is the implementation specific header data. static const uint32_t MagicHash = 0x48415348; - + TableHeader (uint32_t data_len) : magic (MagicHash), version (1), hash_function (eHashFunctionDJB), bucket_count (0), hashes_count (0), header_data_len (data_len) @@ -123,7 +123,7 @@ public: // // uint32_t die_offset_base // uint32_t atom_count - // atom_count Atoms + // atom_count Atoms enum AtomType { eAtomTypeNULL = 0u, eAtomTypeDIEOffset = 1u, // DIE offset, check form for encoding @@ -138,12 +138,12 @@ public: enum TypeFlags { eTypeFlagClassMask = 0x0000000fu, - + // Always set for C++, only set for ObjC if this is the // @implementation for a class. eTypeFlagClassIsImplementation = ( 1u << 1 ) - }; - + }; + // Make these public so that they can be used as a general interface to // the class. struct Atom { @@ -245,7 +245,7 @@ private: void EmitBuckets(AsmPrinter *); void EmitHashes(AsmPrinter *); void EmitOffsets(AsmPrinter *, MCSymbol *); - void EmitData(AsmPrinter *, DwarfDebug *D); + void EmitData(AsmPrinter *, DwarfUnits *D); // Allocator for HashData and HashDataContents. BumpPtrAllocator Allocator; @@ -265,14 +265,14 @@ private: typedef std::vector BucketList; BucketList Buckets; HashList Hashes; - + // Public Implementation public: DwarfAccelTable(ArrayRef); ~DwarfAccelTable(); void AddName(StringRef, DIE*, char = 0); void FinalizeTable(AsmPrinter *, const char *); - void Emit(AsmPrinter *, MCSymbol *, DwarfDebug *); + void Emit(AsmPrinter *, MCSymbol *, DwarfUnits *); #ifndef NDEBUG void print(raw_ostream &O); void dump() { print(dbgs()); } diff --git a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp index 4fdd5ca25221..fec5cedc684b 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCFIException.cpp @@ -12,31 +12,31 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" using namespace llvm; DwarfCFIException::DwarfCFIException(AsmPrinter *A) @@ -122,8 +122,9 @@ void DwarfCFIException::BeginFunction(const MachineFunction *MF) { const MCSymbol *Sym = TLOF.getCFIPersonalitySymbol(Per, Asm->Mang, MMI); Asm->OutStreamer.EmitCFIPersonality(Sym, PerEncoding); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("eh_func_begin", - Asm->getFunctionNumber())); + Asm->OutStreamer.EmitDebugLabel + (Asm->GetTempSymbol("eh_func_begin", + Asm->getFunctionNumber())); // Provide LSDA information. if (!shouldEmitLSDA) diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index 2b07dda31ffe..f9b6f9472141 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -13,28 +13,29 @@ #define DEBUG_TYPE "dwarfdebug" -#include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" +#include "DwarfAccelTable.h" #include "DwarfDebug.h" -#include "llvm/Constants.h" +#include "llvm/ADT/APFloat.h" #include "llvm/DIBuilder.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/APFloat.h" -#include "llvm/Support/ErrorHandling.h" using namespace llvm; /// CompileUnit - Compile unit constructor. -CompileUnit::CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, - DwarfDebug *DW) - : ID(I), Language(L), CUDie(D), Asm(A), DD(DW), IndexTyDie(0) { +CompileUnit::CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, + DwarfDebug *DW, DwarfUnits *DWU) + : UniqueID(UID), Language(L), CUDie(D), Asm(A), DD(DW), DU(DWU), + IndexTyDie(0), DebugInfoOffset(0) { DIEIntegerOne = new (DIEValueAllocator) DIEInteger(1); } @@ -51,6 +52,50 @@ DIEEntry *CompileUnit::createDIEEntry(DIE *Entry) { return Value; } +/// getDefaultLowerBound - Return the default lower bound for an array. If the +/// DWARF version doesn't handle the language, return -1. +int64_t CompileUnit::getDefaultLowerBound() const { + switch (Language) { + default: + break; + + case dwarf::DW_LANG_C89: + case dwarf::DW_LANG_C99: + case dwarf::DW_LANG_C: + case dwarf::DW_LANG_C_plus_plus: + case dwarf::DW_LANG_ObjC: + case dwarf::DW_LANG_ObjC_plus_plus: + return 0; + + case dwarf::DW_LANG_Fortran77: + case dwarf::DW_LANG_Fortran90: + case dwarf::DW_LANG_Fortran95: + return 1; + + // The languages below have valid values only if the DWARF version >= 4. + case dwarf::DW_LANG_Java: + case dwarf::DW_LANG_Python: + case dwarf::DW_LANG_UPC: + case dwarf::DW_LANG_D: + if (dwarf::DWARF_VERSION >= 4) + return 0; + break; + + case dwarf::DW_LANG_Ada83: + case dwarf::DW_LANG_Ada95: + case dwarf::DW_LANG_Cobol74: + case dwarf::DW_LANG_Cobol85: + case dwarf::DW_LANG_Modula2: + case dwarf::DW_LANG_Pascal83: + case dwarf::DW_LANG_PLI: + if (dwarf::DWARF_VERSION >= 4) + return 1; + break; + } + + return -1; +} + /// addFlag - Add a flag that is true. void CompileUnit::addFlag(DIE *Die, unsigned Attribute) { if (!DD->useDarwinGDBCompat()) @@ -81,14 +126,37 @@ void CompileUnit::addSInt(DIE *Die, unsigned Attribute, /// addString - Add a string attribute data and value. We always emit a /// reference to the string pool instead of immediate strings so that DIEs have -/// more predictable sizes. +/// more predictable sizes. In the case of split dwarf we emit an index +/// into another table which gets us the static offset into the string +/// table. void CompileUnit::addString(DIE *Die, unsigned Attribute, StringRef String) { - MCSymbol *Symb = DD->getStringPoolEntry(String); + if (!DD->useSplitDwarf()) { + MCSymbol *Symb = DU->getStringPoolEntry(String); + DIEValue *Value; + if (Asm->needsRelocationsForDwarfStringPool()) + Value = new (DIEValueAllocator) DIELabel(Symb); + else { + MCSymbol *StringPool = DU->getStringPoolSym(); + Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); + } + Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); + } else { + unsigned idx = DU->getStringPoolIndex(String); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_str_index, Value); + } +} + +/// addLocalString - Add a string attribute data and value. This is guaranteed +/// to be in the local string pool instead of indirected. +void CompileUnit::addLocalString(DIE *Die, unsigned Attribute, + StringRef String) { + MCSymbol *Symb = DU->getStringPoolEntry(String); DIEValue *Value; if (Asm->needsRelocationsForDwarfStringPool()) Value = new (DIEValueAllocator) DIELabel(Symb); else { - MCSymbol *StringPool = DD->getStringPool(); + MCSymbol *StringPool = DU->getStringPoolSym(); Value = new (DIEValueAllocator) DIEDelta(Symb, StringPool); } Die->addValue(Attribute, dwarf::DW_FORM_strp, Value); @@ -102,6 +170,42 @@ void CompileUnit::addLabel(DIE *Die, unsigned Attribute, unsigned Form, Die->addValue(Attribute, Form, Value); } +/// addLabelAddress - Add a dwarf label attribute data and value using +/// DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addLabelAddress(DIE *Die, unsigned Attribute, + MCSymbol *Label) { + if (!DD->useSplitDwarf()) { + if (Label != NULL) { + DIEValue *Value = new (DIEValueAllocator) DIELabel(Label); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } else { + DIEValue *Value = new (DIEValueAllocator) DIEInteger(0); + Die->addValue(Attribute, dwarf::DW_FORM_addr, Value); + } + } else { + unsigned idx = DU->getAddrPoolIndex(Label); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + Die->addValue(Attribute, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + +/// addOpAddress - Add a dwarf op address data and value using the +/// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. +/// +void CompileUnit::addOpAddress(DIE *Die, MCSymbol *Sym) { + + if (!DD->useSplitDwarf()) { + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); + addLabel(Die, 0, dwarf::DW_FORM_udata, Sym); + } else { + unsigned idx = DU->getAddrPoolIndex(Sym); + DIEValue *Value = new (DIEValueAllocator) DIEInteger(idx); + addUInt(Die, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_GNU_addr_index); + Die->addValue(0, dwarf::DW_FORM_GNU_addr_index, Value); + } +} + /// addDelta - Add a label delta attribute data and value. /// void CompileUnit::addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -132,12 +236,13 @@ void CompileUnit::addSourceLine(DIE *Die, DIVariable V) { // Verify variable. if (!V.Verify()) return; - + unsigned Line = V.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(V.getContext().getFilename(), - V.getContext().getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(V.getContext().getFilename(), + V.getContext().getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -153,7 +258,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIGlobalVariable G) { unsigned Line = G.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(G.getFilename(), G.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(G.getFilename(), G.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -171,8 +277,8 @@ void CompileUnit::addSourceLine(DIE *Die, DISubprogram SP) { if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(SP.getFilename(), - SP.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(SP.getFilename(), + SP.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -188,8 +294,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIType Ty) { unsigned Line = Ty.getLineNumber(); if (Line == 0) return; - unsigned FileID = DD->GetOrCreateSourceID(Ty.getFilename(), - Ty.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(Ty.getFilename(), + Ty.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -206,8 +312,8 @@ void CompileUnit::addSourceLine(DIE *Die, DIObjCProperty Ty) { if (Line == 0) return; DIFile File = Ty.getFile(); - unsigned FileID = DD->GetOrCreateSourceID(File.getFilename(), - File.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(File.getFilename(), + File.getDirectory(), getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); @@ -225,15 +331,16 @@ void CompileUnit::addSourceLine(DIE *Die, DINameSpace NS) { return; StringRef FN = NS.getFilename(); - unsigned FileID = DD->GetOrCreateSourceID(FN, NS.getDirectory()); + unsigned FileID = DD->getOrCreateSourceID(FN, NS.getDirectory(), + getUniqueID()); assert(FileID && "Invalid file id"); addUInt(Die, dwarf::DW_AT_decl_file, 0, FileID); addUInt(Die, dwarf::DW_AT_decl_line, 0, Line); } -/// addVariableAddress - Add DW_AT_location attribute for a +/// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. -void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, +void CompileUnit::addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location) { if (DV->variableHasComplexAddress()) addComplexAddress(DV, Die, dwarf::DW_AT_location, Location); @@ -492,7 +599,7 @@ bool CompileUnit::addConstantValue(DIE *Die, const MachineOperand &MO, case 64: Form = dwarf::DW_FORM_data8; break; default: break; } - SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) + SignedConstant ? addSInt(Block, 0, Form, MO.getImm()) : addUInt(Block, 0, Form, MO.getImm()); addBlock(Die, dwarf::DW_AT_const_value, 0, Block); @@ -524,10 +631,21 @@ bool CompileUnit::addConstantFPValue(DIE *Die, const MachineOperand &MO) { return true; } +/// addConstantFPValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantFPValue(DIE *Die, const ConstantFP *CFP) { + return addConstantValue(Die, CFP->getValueAPF().bitcastToAPInt(), false); +} + /// addConstantValue - Add constant value entry in variable DIE. bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned) { - unsigned CIBitWidth = CI->getBitWidth(); + return addConstantValue(Die, CI->getValue(), Unsigned); +} + +// addConstantValue - Add constant value entry in variable DIE. +bool CompileUnit::addConstantValue(DIE *Die, const APInt &Val, + bool Unsigned) { + unsigned CIBitWidth = Val.getBitWidth(); if (CIBitWidth <= 64) { unsigned form = 0; switch (CIBitWidth) { @@ -535,20 +653,19 @@ bool CompileUnit::addConstantValue(DIE *Die, const ConstantInt *CI, case 16: form = dwarf::DW_FORM_data2; break; case 32: form = dwarf::DW_FORM_data4; break; case 64: form = dwarf::DW_FORM_data8; break; - default: + default: form = Unsigned ? dwarf::DW_FORM_udata : dwarf::DW_FORM_sdata; } if (Unsigned) - addUInt(Die, dwarf::DW_AT_const_value, form, CI->getZExtValue()); + addUInt(Die, dwarf::DW_AT_const_value, form, Val.getZExtValue()); else - addSInt(Die, dwarf::DW_AT_const_value, form, CI->getSExtValue()); + addSInt(Die, dwarf::DW_AT_const_value, form, Val.getSExtValue()); return true; } DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); // Get the raw data form of the large APInt. - const APInt Val = CI->getValue(); const uint64_t *Ptr64 = Val.getRawData(); int NumBytes = Val.getBitWidth() / 8; // 8 bits per byte. @@ -582,18 +699,21 @@ void CompileUnit::addTemplateParams(DIE &Buffer, DIArray TParams) { } } +/// getOrCreateContextDIE - Get context owner's DIE. +DIE *CompileUnit::getOrCreateContextDIE(DIDescriptor Context) { + if (Context.isType()) + return getOrCreateTypeDIE(DIType(Context)); + else if (Context.isNameSpace()) + return getOrCreateNameSpace(DINameSpace(Context)); + else if (Context.isSubprogram()) + return getOrCreateSubprogramDIE(DISubprogram(Context)); + else + return getDIE(Context); +} + /// addToContextOwner - Add Die into the list of its context owner's children. void CompileUnit::addToContextOwner(DIE *Die, DIDescriptor Context) { - if (Context.isType()) { - DIE *ContextDIE = getOrCreateTypeDIE(DIType(Context)); - ContextDIE->addChild(Die); - } else if (Context.isNameSpace()) { - DIE *ContextDIE = getOrCreateNameSpace(DINameSpace(Context)); - ContextDIE->addChild(Die); - } else if (Context.isSubprogram()) { - DIE *ContextDIE = getOrCreateSubprogramDIE(DISubprogram(Context)); - ContextDIE->addChild(Die); - } else if (DIE *ContextDIE = getDIE(Context)) + if (DIE *ContextDIE = getOrCreateContextDIE(Context)) ContextDIE->addChild(Die); else addDie(Die); @@ -635,7 +755,7 @@ DIE *CompileUnit::getOrCreateTypeDIE(const MDNode *TyNode) { DwarfAccelTable::eTypeFlagClassIsImplementation : 0; addAccelType(Ty.getName(), std::make_pair(TyDIE, Flags)); } - + addToContextOwner(TyDIE, Ty.getContext()); return TyDIE; } @@ -670,8 +790,8 @@ void CompileUnit::addType(DIE *Entity, DIType Ty, unsigned Attribute) { /// void CompileUnit::addGlobalType(DIType Ty) { DIDescriptor Context = Ty.getContext(); - if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() - && (!Context || Context.isCompileUnit() || Context.isFile() + if (Ty.isCompositeType() && !Ty.getName().empty() && !Ty.isForwardDecl() + && (!Context || Context.isCompileUnit() || Context.isFile() || Context.isNameSpace())) if (DIEEntry *Entry = getDIEEntry(Ty)) GlobalTypes[Ty.getName()] = Entry->getEntry(); @@ -739,6 +859,9 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DIDerivedType DTy) { if (Size && Tag != dwarf::DW_TAG_pointer_type) addUInt(&Buffer, dwarf::DW_AT_byte_size, 0, Size); + if (Tag == dwarf::DW_TAG_ptr_to_member_type) + addDIEEntry(&Buffer, dwarf::DW_AT_containing_type, dwarf::DW_FORM_ref4, + getOrCreateTypeDIE(DTy.getClassType())); // Add source line info if available and TyDesc is not a forward declaration. if (!DTy.isForwardDecl()) addSourceLine(&Buffer, DTy); @@ -754,7 +877,6 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { Buffer.setTag(Tag); switch (Tag) { - case dwarf::DW_TAG_vector_type: case dwarf::DW_TAG_array_type: constructArrayTypeDIE(Buffer, &CTy); break; @@ -794,6 +916,8 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } else { DIE *Arg = new DIE(dwarf::DW_TAG_formal_parameter); addType(Arg, DIType(Ty)); + if (DIType(Ty).isArtificial()) + addFlag(Arg, dwarf::DW_AT_artificial); Buffer.addChild(Arg); } } @@ -830,27 +954,20 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { else if (SP.isPrivate()) addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); - else + else addUInt(ElemDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (SP.isExplicit()) addFlag(ElemDie, dwarf::DW_AT_explicit); - } - else if (Element.isVariable()) { - DIVariable DV(Element); - ElemDie = new DIE(dwarf::DW_TAG_variable); - addString(ElemDie, dwarf::DW_AT_name, DV.getName()); - addType(ElemDie, DV.getType()); - addFlag(ElemDie, dwarf::DW_AT_declaration); - addFlag(ElemDie, dwarf::DW_AT_external); - addSourceLine(ElemDie, DV); } else if (Element.isDerivedType()) { DIDerivedType DDTy(Element); if (DDTy.getTag() == dwarf::DW_TAG_friend) { ElemDie = new DIE(dwarf::DW_TAG_friend); addType(ElemDie, DDTy.getTypeDerivedFrom(), dwarf::DW_AT_friend); - } else - ElemDie = createMemberDIE(DIDerivedType(Element)); + } else if (DDTy.isStaticMember()) + ElemDie = createStaticMemberDIE(DDTy); + else + ElemDie = createMemberDIE(DDTy); } else if (Element.isObjCProperty()) { DIObjCProperty Property(Element); ElemDie = new DIE(Property.getTag()); @@ -878,7 +995,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { if (Property.isNonAtomicObjCProperty()) PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; if (PropertyAttributes) - addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, + addUInt(ElemDie, dwarf::DW_AT_APPLE_property_attribute, 0, PropertyAttributes); DIEEntry *Entry = getDIEEntry(Element); @@ -951,7 +1068,7 @@ void CompileUnit::constructTypeDIE(DIE &Buffer, DICompositeType CTy) { } } -/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE +/// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateTypeParameter. DIE * CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { @@ -965,7 +1082,7 @@ CompileUnit::getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP) { return ParamDIE; } -/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE +/// getOrCreateTemplateValueParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateValueParameter. DIE * CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ @@ -977,7 +1094,7 @@ CompileUnit::getOrCreateTemplateValueParameterDIE(DITemplateValueParameter TPV){ addType(ParamDIE, TPV.getType()); if (!TPV.getName().empty()) addString(ParamDIE, dwarf::DW_AT_name, TPV.getName()); - addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, + addUInt(ParamDIE, dwarf::DW_AT_const_value, dwarf::DW_FORM_udata, TPV.getValue()); return ParamDIE; } @@ -1095,7 +1212,7 @@ DIE *CompileUnit::getOrCreateSubprogramDIE(DISubprogram SP) { if (!SP.isDefinition()) { addFlag(SPDie, dwarf::DW_AT_declaration); - + // Add arguments. Do not add arguments for subprogram definition. They will // be handled while processing variables. DICompositeType SPTy = SP.getType(); @@ -1166,39 +1283,56 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { if (!GV.Verify()) return; - DIE *VariableDIE = new DIE(GV.getTag()); - // Add to map. - insertDIE(N, VariableDIE); - - // Add name. - addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); - StringRef LinkageName = GV.getLinkageName(); - bool isGlobalVariable = GV.getGlobal() != NULL; - if (!LinkageName.empty() && isGlobalVariable) - addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, - getRealLinkageName(LinkageName)); - // Add type. + DIDescriptor GVContext = GV.getContext(); DIType GTy = GV.getType(); - addType(VariableDIE, GTy); - // Add scoping info. - if (!GV.isLocalToUnit()) - addFlag(VariableDIE, dwarf::DW_AT_external); + // If this is a static data member definition, some attributes belong + // to the declaration DIE. + DIE *VariableDIE = NULL; + bool IsStaticMember = false; + DIDerivedType SDMDecl = GV.getStaticDataMemberDeclaration(); + if (SDMDecl.Verify()) { + assert(SDMDecl.isStaticMember() && "Expected static member decl"); + // We need the declaration DIE that is in the static member's class. + // But that class might not exist in the DWARF yet. + // Creating the class will create the static member decl DIE. + getOrCreateContextDIE(SDMDecl.getContext()); + VariableDIE = getDIE(SDMDecl); + assert(VariableDIE && "Static member decl has no context?"); + IsStaticMember = true; + } + + // If this is not a static data member definition, create the variable + // DIE and add the initial set of attributes to it. + if (!VariableDIE) { + VariableDIE = new DIE(GV.getTag()); + // Add to map. + insertDIE(N, VariableDIE); + + // Add name and type. + addString(VariableDIE, dwarf::DW_AT_name, GV.getDisplayName()); + addType(VariableDIE, GTy); + + // Add scoping info. + if (!GV.isLocalToUnit()) { + addFlag(VariableDIE, dwarf::DW_AT_external); + addGlobalName(GV.getName(), VariableDIE); + } + + // Add line number info. + addSourceLine(VariableDIE, GV); + // Add to context owner. + addToContextOwner(VariableDIE, GVContext); + } - // Add line number info. - addSourceLine(VariableDIE, GV); - // Add to context owner. - DIDescriptor GVContext = GV.getContext(); - addToContextOwner(VariableDIE, GVContext); // Add location. bool addToAccelTable = false; DIE *VariableSpecDIE = NULL; + bool isGlobalVariable = GV.getGlobal() != NULL; if (isGlobalVariable) { addToAccelTable = true; DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(GV.getGlobal())); + addOpAddress(Block, Asm->Mang->getSymbol(GV.getGlobal())); // Do not create specification DIE if context is either compile unit // or a subprogram. if (GVContext && GV.isDefinition() && !GVContext.isCompileUnit() && @@ -1208,25 +1342,44 @@ void CompileUnit::createGlobalVariableDIE(const MDNode *N) { addDIEEntry(VariableSpecDIE, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, VariableDIE); addBlock(VariableSpecDIE, dwarf::DW_AT_location, 0, Block); - addFlag(VariableDIE, dwarf::DW_AT_declaration); + // A static member's declaration is already flagged as such. + if (!SDMDecl.Verify()) + addFlag(VariableDIE, dwarf::DW_AT_declaration); addDie(VariableSpecDIE); } else { addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); } - } else if (const ConstantInt *CI = - dyn_cast_or_null(GV.getConstant())) - addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); - else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { + // Add linkage name. + StringRef LinkageName = GV.getLinkageName(); + if (!LinkageName.empty()) { + // From DWARF4: DIEs to which DW_AT_linkage_name may apply include: + // TAG_common_block, TAG_constant, TAG_entry_point, TAG_subprogram and + // TAG_variable. + addString(IsStaticMember && VariableSpecDIE ? + VariableSpecDIE : VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + // In compatibility mode with older gdbs we put the linkage name on both + // the TAG_variable DIE and on the TAG_member DIE. + if (IsStaticMember && VariableSpecDIE && DD->useDarwinGDBCompat()) + addString(VariableDIE, dwarf::DW_AT_MIPS_linkage_name, + getRealLinkageName(LinkageName)); + } + } else if (const ConstantInt *CI = + dyn_cast_or_null(GV.getConstant())) { + // AT_const_value was added when the static member was created. To avoid + // emitting AT_const_value multiple times, we only add AT_const_value when + // it is not a static member. + if (!IsStaticMember) + addConstantValue(VariableDIE, CI, GTy.isUnsignedDIType()); + } else if (const ConstantExpr *CE = getMergedGlobalExpr(N->getOperand(11))) { addToAccelTable = true; // GV is a merged global. DIEBlock *Block = new (DIEValueAllocator) DIEBlock(); Value *Ptr = CE->getOperand(0); - addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_addr); - addLabel(Block, 0, dwarf::DW_FORM_udata, - Asm->Mang->getSymbol(cast(Ptr))); + addOpAddress(Block, Asm->Mang->getSymbol(cast(Ptr))); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_constu); SmallVector Idx(CE->op_begin()+1, CE->op_end()); - addUInt(Block, 0, dwarf::DW_FORM_udata, + addUInt(Block, 0, dwarf::DW_FORM_udata, Asm->getDataLayout().getIndexedOffset(Ptr->getType(), Idx)); addUInt(Block, 0, dwarf::DW_FORM_data1, dwarf::DW_OP_plus); addBlock(VariableDIE, dwarf::DW_AT_location, 0, Block); @@ -1250,22 +1403,25 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy) { DIE *DW_Subrange = new DIE(dwarf::DW_TAG_subrange_type); addDIEEntry(DW_Subrange, dwarf::DW_AT_type, dwarf::DW_FORM_ref4, IndexTy); - uint64_t L = SR.getLo(); - uint64_t H = SR.getHi(); - // The L value defines the lower bounds which is typically zero for C/C++. The - // H value is the upper bounds. Values are 64 bit. H - L + 1 is the size - // of the array. If L > H then do not emit DW_AT_lower_bound and - // DW_AT_upper_bound attributes. If L is zero and H is also zero then the - // array has one element and in such case do not emit lower bound. + // The LowerBound value defines the lower bounds which is typically zero for + // C/C++. The Count value is the number of elements. Values are 64 bit. If + // Count == -1 then the array is unbounded and we do not emit + // DW_AT_lower_bound and DW_AT_upper_bound attributes. If LowerBound == 0 and + // Count == 0, then the array has zero elements in which case we do not emit + // an upper bound. + int64_t LowerBound = SR.getLo(); + int64_t DefaultLowerBound = getDefaultLowerBound(); + int64_t Count = SR.getCount(); + + if (DefaultLowerBound == -1 || LowerBound != DefaultLowerBound) + addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, LowerBound); + + if (Count != -1 && Count != 0) + // FIXME: An unbounded array should reference the expression that defines + // the array. + addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, LowerBound + Count - 1); - if (L > H) { - Buffer.addChild(DW_Subrange); - return; - } - if (L) - addUInt(DW_Subrange, dwarf::DW_AT_lower_bound, 0, L); - addUInt(DW_Subrange, dwarf::DW_AT_upper_bound, 0, H); Buffer.addChild(DW_Subrange); } @@ -1273,7 +1429,7 @@ void CompileUnit::constructSubrangeDIE(DIE &Buffer, DISubrange SR, void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy) { Buffer.setTag(dwarf::DW_TAG_array_type); - if (CTy->getTag() == dwarf::DW_TAG_vector_type) + if (CTy->isVector()) addFlag(&Buffer, dwarf::DW_AT_GNU_vector); // Emit derived type. @@ -1281,10 +1437,13 @@ void CompileUnit::constructArrayTypeDIE(DIE &Buffer, DIArray Elements = CTy->getTypeArray(); // Get an anonymous type for index type. + // FIXME: This type should be passed down from the front end + // as different languages may have different sizes for indexes. DIE *IdxTy = getIndexTyDie(); if (!IdxTy) { // Construct an anonymous type for index type. IdxTy = new DIE(dwarf::DW_TAG_base_type); + addString(IdxTy, dwarf::DW_AT_name, "int"); addUInt(IdxTy, dwarf::DW_AT_byte_size, 0, sizeof(int32_t)); addUInt(IdxTy, dwarf::DW_AT_encoding, dwarf::DW_FORM_data1, dwarf::DW_ATE_signed); @@ -1327,8 +1486,6 @@ void CompileUnit::constructContainingTypeDIEs() { /// constructVariableDIE - Construct a DIE for the given DbgVariable. DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { StringRef Name = DV->getName(); - if (Name.empty()) - return NULL; // Translate tag to proper Dwarf tag. unsigned Tag = DV->getTag(); @@ -1376,20 +1533,20 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { TRI->getFrameRegister(*Asm->MF) == RegOp.getReg()) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = - TFI->getFrameIndexReference(*Asm->MF, - DVInsn->getOperand(1).getImm(), + int Offset = + TFI->getFrameIndexReference(*Asm->MF, + DVInsn->getOperand(1).getImm(), FrameReg); MachineLocation Location(FrameReg, Offset); addVariableAddress(DV, VariableDie, Location); - + } else if (RegOp.getReg()) - addVariableAddress(DV, VariableDie, + addVariableAddress(DV, VariableDie, MachineLocation(RegOp.getReg())); updated = true; } else if (DVInsn->getOperand(0).isImm()) - updated = + updated = addConstantValue(VariableDie, DVInsn->getOperand(0), DV->getType()); else if (DVInsn->getOperand(0).isFPImm()) @@ -1397,11 +1554,11 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { addConstantFPValue(VariableDie, DVInsn->getOperand(0)); else if (DVInsn->getOperand(0).isCImm()) updated = - addConstantValue(VariableDie, + addConstantValue(VariableDie, DVInsn->getOperand(0).getCImm(), DV->getType().isUnsignedDIType()); } else { - addVariableAddress(DV, VariableDie, + addVariableAddress(DV, VariableDie, Asm->getDebugValueLocation(DVInsn)); updated = true; } @@ -1419,7 +1576,7 @@ DIE *CompileUnit::constructVariableDIE(DbgVariable *DV, bool isScopeAbstract) { if (FI != ~0) { unsigned FrameReg = 0; const TargetFrameLowering *TFI = Asm->TM.getFrameLowering(); - int Offset = + int Offset = TFI->getFrameIndexReference(*Asm->MF, FI, FrameReg); MachineLocation Location(FrameReg, Offset); addVariableAddress(DV, VariableDie, Location); @@ -1499,7 +1656,7 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_private); // Otherwise C++ member and base classes are considered public. - else + else addUInt(MemberDie, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, dwarf::DW_ACCESS_public); if (DT.isVirtual()) @@ -1509,35 +1666,46 @@ DIE *CompileUnit::createMemberDIE(DIDerivedType DT) { // Objective-C properties. if (MDNode *PNode = DT.getObjCProperty()) if (DIEEntry *PropertyDie = getDIEEntry(PNode)) - MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, + MemberDie->addValue(dwarf::DW_AT_APPLE_property, dwarf::DW_FORM_ref4, PropertyDie); - // This is only for backward compatibility. - StringRef PropertyName = DT.getObjCPropertyName(); - if (!PropertyName.empty()) { - addString(MemberDie, dwarf::DW_AT_APPLE_property_name, PropertyName); - StringRef GetterName = DT.getObjCPropertyGetterName(); - if (!GetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_getter, GetterName); - StringRef SetterName = DT.getObjCPropertySetterName(); - if (!SetterName.empty()) - addString(MemberDie, dwarf::DW_AT_APPLE_property_setter, SetterName); - unsigned PropertyAttributes = 0; - if (DT.isReadOnlyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readonly; - if (DT.isReadWriteObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_readwrite; - if (DT.isAssignObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_assign; - if (DT.isRetainObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_retain; - if (DT.isCopyObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_copy; - if (DT.isNonAtomicObjCProperty()) - PropertyAttributes |= dwarf::DW_APPLE_PROPERTY_nonatomic; - if (PropertyAttributes) - addUInt(MemberDie, dwarf::DW_AT_APPLE_property_attribute, 0, - PropertyAttributes); - } + if (DT.isArtificial()) + addFlag(MemberDie, dwarf::DW_AT_artificial); + return MemberDie; } + +/// createStaticMemberDIE - Create new DIE for C++ static member. +DIE *CompileUnit::createStaticMemberDIE(const DIDerivedType DT) { + if (!DT.Verify()) + return NULL; + + DIE *StaticMemberDIE = new DIE(DT.getTag()); + DIType Ty = DT.getTypeDerivedFrom(); + + addString(StaticMemberDIE, dwarf::DW_AT_name, DT.getName()); + addType(StaticMemberDIE, Ty); + addSourceLine(StaticMemberDIE, DT); + addFlag(StaticMemberDIE, dwarf::DW_AT_external); + addFlag(StaticMemberDIE, dwarf::DW_AT_declaration); + + // FIXME: We could omit private if the parent is a class_type, and + // public if the parent is something else. + if (DT.isProtected()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_protected); + else if (DT.isPrivate()) + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_private); + else + addUInt(StaticMemberDIE, dwarf::DW_AT_accessibility, dwarf::DW_FORM_data1, + dwarf::DW_ACCESS_public); + + if (const ConstantInt *CI = dyn_cast_or_null(DT.getConstant())) + addConstantValue(StaticMemberDIE, CI, Ty.isUnsignedDIType()); + if (const ConstantFP *CFP = dyn_cast_or_null(DT.getConstant())) + addConstantFPValue(StaticMemberDIE, CFP); + + insertDIE(DT, StaticMemberDIE); + return StaticMemberDIE; +} diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h index fad9b6e06684..2b180c6cc3f4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.h @@ -15,26 +15,28 @@ #define CODEGEN_ASMPRINTER_DWARFCOMPILEUNIT_H #include "DIE.h" -#include "llvm/DebugInfo.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/DebugInfo.h" namespace llvm { class DwarfDebug; +class DwarfUnits; class MachineLocation; class MachineOperand; class ConstantInt; +class ConstantFP; class DbgVariable; //===----------------------------------------------------------------------===// /// CompileUnit - This dwarf writer support class manages information associated /// with a source file. class CompileUnit { - /// ID - File identifier for source. + /// UniqueID - a numeric ID unique among all CUs in the module /// - unsigned ID; + unsigned UniqueID; /// Language - The DW_AT_language of the compile unit /// @@ -47,7 +49,9 @@ class CompileUnit { /// Asm - Target of Dwarf emission. AsmPrinter *Asm; + // Holders for some common dwarf information. DwarfDebug *DD; + DwarfUnits *DU; /// IndexTyDie - An anonymous type for index type. Owned by CUDie. DIE *IndexTyDie; @@ -60,6 +64,10 @@ class CompileUnit { /// descriptors to debug information entries using a DIEEntry proxy. DenseMap MDNodeToDIEEntryMap; + /// GlobalNames - A map of globally visible named entities for this unit. + /// + StringMap GlobalNames; + /// GlobalTypes - A map of globally visible types for this unit. /// StringMap GlobalTypes; @@ -79,14 +87,27 @@ class CompileUnit { /// corresponds to the MDNode mapped with the subprogram DIE. DenseMap ContainingTypeMap; + /// Offset of the CUDie from beginning of debug info section. + unsigned DebugInfoOffset; + + /// getLowerBoundDefault - Return the default lower bound for an array. If the + /// DWARF version doesn't handle the language, return -1. + int64_t getDefaultLowerBound() const; + + /// getOrCreateContextDIE - Get context owner's DIE. + DIE *getOrCreateContextDIE(DIDescriptor Context); + public: - CompileUnit(unsigned I, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW); + CompileUnit(unsigned UID, unsigned L, DIE *D, AsmPrinter *A, DwarfDebug *DW, + DwarfUnits *); ~CompileUnit(); // Accessors. - unsigned getID() const { return ID; } + unsigned getUniqueID() const { return UniqueID; } unsigned getLanguage() const { return Language; } DIE* getCUDie() const { return CUDie.get(); } + unsigned getDebugInfoOffset() const { return DebugInfoOffset; } + const StringMap &getGlobalNames() const { return GlobalNames; } const StringMap &getGlobalTypes() const { return GlobalTypes; } const StringMap > &getAccelNames() const { @@ -102,11 +123,16 @@ public: &getAccelTypes() const { return AccelTypes; } - + + void setDebugInfoOffset(unsigned DbgInfoOff) { DebugInfoOffset = DbgInfoOff; } /// hasContent - Return true if this compile unit has something to write out. /// bool hasContent() const { return !CUDie->getChildren().empty(); } + /// addGlobalName - Add a new global entity to the compile unit. + /// + void addGlobalName(StringRef Name, DIE *Die) { GlobalNames[Name] = Die; } + /// addGlobalType - Add a new global type to the compile unit. /// void addGlobalType(DIType Ty); @@ -129,12 +155,12 @@ public: std::vector > &DIEs = AccelTypes[Name]; DIEs.push_back(Die); } - + /// getDIE - Returns the debug information entry map slot for the /// specified debug variable. DIE *getDIE(const MDNode *N) { return MDNodeToDieMap.lookup(N); } - DIEBlock *getDIEBlock() { + DIEBlock *getDIEBlock() { return new (DIEValueAllocator) DIEBlock(); } @@ -174,11 +200,10 @@ public: void setIndexTyDie(DIE *D) { IndexTyDie = D; } -public: /// addFlag - Add a flag that is true to the DIE. void addFlag(DIE *Die, unsigned Attribute); - + /// addUInt - Add an unsigned integer attribute data and value. /// void addUInt(DIE *Die, unsigned Attribute, unsigned Form, uint64_t Integer); @@ -191,11 +216,25 @@ public: /// void addString(DIE *Die, unsigned Attribute, const StringRef Str); + /// addLocalString - Add a string attribute data and value. + /// + void addLocalString(DIE *Die, unsigned Attribute, const StringRef Str); + /// addLabel - Add a Dwarf label attribute data and value. /// void addLabel(DIE *Die, unsigned Attribute, unsigned Form, const MCSymbol *Label); + /// addLabelAddress - Add a dwarf label attribute data and value using + /// either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addLabelAddress(DIE *Die, unsigned Attribute, MCSymbol *Label); + + /// addOpAddress - Add a dwarf op address data and value using the + /// form given and an op of either DW_FORM_addr or DW_FORM_GNU_addr_index. + /// + void addOpAddress(DIE *Die, MCSymbol *Label); + /// addDelta - Add a label delta attribute data and value. /// void addDelta(DIE *Die, unsigned Attribute, unsigned Form, @@ -204,7 +243,7 @@ public: /// addDIEEntry - Add a DIE attribute data and value. /// void addDIEEntry(DIE *Die, unsigned Attribute, unsigned Form, DIE *Entry); - + /// addBlock - Add block data. /// void addBlock(DIE *Die, unsigned Attribute, unsigned Form, DIEBlock *Block); @@ -226,9 +265,11 @@ public: /// addConstantValue - Add constant value entry in variable DIE. bool addConstantValue(DIE *Die, const MachineOperand &MO, DIType Ty); bool addConstantValue(DIE *Die, const ConstantInt *CI, bool Unsigned); + bool addConstantValue(DIE *Die, const APInt &Val, bool Unsigned); /// addConstantFPValue - Add constant value entry in variable DIE. bool addConstantFPValue(DIE *Die, const MachineOperand &MO); + bool addConstantFPValue(DIE *Die, const ConstantFP *CFP); /// addTemplateParams - Add template parameters in buffer. void addTemplateParams(DIE &Buffer, DIArray TParams); @@ -257,7 +298,7 @@ public: void addBlockByrefAddress(DbgVariable *&DV, DIE *Die, unsigned Attribute, const MachineLocation &Location); - /// addVariableAddress - Add DW_AT_location attribute for a + /// addVariableAddress - Add DW_AT_location attribute for a /// DbgVariable based on provided MachineLocation. void addVariableAddress(DbgVariable *&DV, DIE *Die, MachineLocation Location); @@ -279,7 +320,7 @@ public: /// given DIType. DIE *getOrCreateTypeDIE(const MDNode *N); - /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE + /// getOrCreateTemplateTypeParameterDIE - Find existing DIE or create new DIE /// for the given DITemplateTypeParameter. DIE *getOrCreateTemplateTypeParameterDIE(DITemplateTypeParameter TP); @@ -312,7 +353,7 @@ public: void constructSubrangeDIE(DIE &Buffer, DISubrange SR, DIE *IndexTy); /// constructArrayTypeDIE - Construct array type DIE from DICompositeType. - void constructArrayTypeDIE(DIE &Buffer, + void constructArrayTypeDIE(DIE &Buffer, DICompositeType *CTy); /// constructEnumTypeDIE - Construct enum type DIE from DIEnumerator. @@ -328,6 +369,9 @@ public: /// createMemberDIE - Create new member DIE. DIE *createMemberDIE(DIDerivedType DT); + /// createStaticMemberDIE - Create new static data member DIE. + DIE *createStaticMemberDIE(DIDerivedType DT); + private: // DIEValueAllocator - All DIEValues are allocated through this allocator. diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index 367b52307925..d3cb4f9c1c0e 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -16,34 +16,34 @@ #include "DIE.h" #include "DwarfAccelTable.h" #include "DwarfCompileUnit.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DIBuilder.h" -#include "llvm/Module.h" -#include "llvm/Instructions.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DIBuilder.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Triple.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/ValueHandle.h" #include "llvm/Support/FormattedStream.h" -#include "llvm/Support/Timer.h" #include "llvm/Support/Path.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/ValueHandle.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt DisableDebugInfoPrinting("disable-debug-info-print", @@ -54,6 +54,10 @@ static cl::opt UnknownLocations("use-unknown-locations", cl::Hidden, cl::desc("Make an absence of debug location information explicit."), cl::init(false)); +static cl::opt GenerateDwarfPubNamesSection("generate-dwarf-pubnames", + cl::Hidden, cl::init(false), + cl::desc("Generate DWARF pubnames section")); + namespace { enum DefaultOnOff { Default, Enable, Disable @@ -78,6 +82,15 @@ static cl::opt DarwinGDBCompat("darwin-gdb-compat", cl::Hidden, clEnumValEnd), cl::init(Default)); +static cl::opt SplitDwarf("split-dwarf", cl::Hidden, + cl::desc("Output prototype dwarf split debug info."), + cl::values( + clEnumVal(Default, "Default for platform"), + clEnumVal(Enable, "Enabled"), + clEnumVal(Disable, "Disabled"), + clEnumValEnd), + cl::init(Default)); + namespace { const char *DWARFGroupName = "DWARF Emission"; const char *DbgTimerName = "DWARF Debug Writer"; @@ -85,8 +98,8 @@ namespace { //===----------------------------------------------------------------------===// -/// Configuration values for initial hash set sizes (log2). -/// +// Configuration values for initial hash set sizes (log2). +// static const unsigned InitAbbreviationsSetSize = 9; // log2(512) namespace llvm { @@ -102,19 +115,19 @@ DIType DbgVariable::getType() const { either the struct, or a pointer to the struct, as its type. This is necessary for various behind-the-scenes things the compiler needs to do with by-reference variables in blocks. - + However, as far as the original *programmer* is concerned, the variable should still have type 'SomeType', as originally declared. - + The following function dives into the __Block_byref_x_VarName struct to find the original type of the variable. This will be passed back to the code generating the type for the Debug Information Entry for the variable 'VarName'. 'VarName' will then have the original type 'SomeType' in its debug information. - + The original type 'SomeType' will be the type of the field named 'VarName' inside the __Block_byref_x_VarName struct. - + NOTE: In order for this to not completely fail on the debugger side, the Debug Information Entry for the variable VarName needs to have a DW_AT_location that tells the debugger how to unwind through @@ -122,15 +135,15 @@ DIType DbgVariable::getType() const { value of the variable. The function addBlockByrefType does this. */ DIType subType = Ty; unsigned tag = Ty.getTag(); - + if (tag == dwarf::DW_TAG_pointer_type) { DIDerivedType DTy = DIDerivedType(Ty); subType = DTy.getTypeDerivedFrom(); } - + DICompositeType blockStruct = DICompositeType(subType); DIArray Elements = blockStruct.getTypeArray(); - + for (unsigned i = 0, N = Elements.getNumElements(); i < N; ++i) { DIDescriptor Element = Elements.getElement(i); DIDerivedType DT = DIDerivedType(Element); @@ -146,45 +159,55 @@ DIType DbgVariable::getType() const { DwarfDebug::DwarfDebug(AsmPrinter *A, Module *M) : Asm(A), MMI(Asm->MMI), FirstCU(0), AbbreviationsSet(InitAbbreviationsSetSize), - SourceIdMap(DIEValueAllocator), StringPool(DIEValueAllocator), - PrevLabel(NULL) { - NextStringPoolNumber = 0; + SourceIdMap(DIEValueAllocator), + PrevLabel(NULL), GlobalCUIndexCount(0), + InfoHolder(A, &AbbreviationsSet, &Abbreviations, "info_string", + DIEValueAllocator), + SkeletonAbbrevSet(InitAbbreviationsSetSize), + SkeletonHolder(A, &SkeletonAbbrevSet, &SkeletonAbbrevs, "skel_string", + DIEValueAllocator) { DwarfInfoSectionSym = DwarfAbbrevSectionSym = 0; DwarfStrSectionSym = TextSectionSym = 0; - DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = 0; + DwarfDebugRangeSectionSym = DwarfDebugLocSectionSym = DwarfLineSectionSym = 0; + DwarfAbbrevDWOSectionSym = DwarfStrDWOSectionSym = 0; FunctionBeginSym = FunctionEndSym = 0; // Turn on accelerator tables and older gdb compatibility // for Darwin. - bool isDarwin = Triple(M->getTargetTriple()).isOSDarwin(); + bool IsDarwin = Triple(M->getTargetTriple()).isOSDarwin(); if (DarwinGDBCompat == Default) { - if (isDarwin) - isDarwinGDBCompat = true; + if (IsDarwin) + IsDarwinGDBCompat = true; else - isDarwinGDBCompat = false; + IsDarwinGDBCompat = false; } else - isDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; + IsDarwinGDBCompat = DarwinGDBCompat == Enable ? true : false; if (DwarfAccelTables == Default) { - if (isDarwin) - hasDwarfAccelTables = true; + if (IsDarwin) + HasDwarfAccelTables = true; else - hasDwarfAccelTables = false; + HasDwarfAccelTables = false; } else - hasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + HasDwarfAccelTables = DwarfAccelTables == Enable ? true : false; + + if (SplitDwarf == Default) + HasSplitDwarf = false; + else + HasSplitDwarf = SplitDwarf == Enable ? true : false; { NamedRegionTimer T(DbgTimerName, DWARFGroupName, TimePassesIsEnabled); - beginModule(M); + beginModule(); } } DwarfDebug::~DwarfDebug() { } -/// EmitSectionSym - Switch to the specified MCSection and emit an assembler -/// temporary label to it if SymbolStem is specified. -static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, +// Switch to the specified MCSection and emit an assembler +// temporary label to it if SymbolStem is specified. +static MCSymbol *emitSectionSym(AsmPrinter *Asm, const MCSection *Section, const char *SymbolStem = 0) { Asm->OutStreamer.SwitchSection(Section); if (!SymbolStem) return 0; @@ -194,44 +217,64 @@ static MCSymbol *EmitSectionSym(AsmPrinter *Asm, const MCSection *Section, return TmpSym; } -MCSymbol *DwarfDebug::getStringPool() { - return Asm->GetTempSymbol("section_str"); +MCSymbol *DwarfUnits::getStringPoolSym() { + return Asm->GetTempSymbol(StringPref); } -MCSymbol *DwarfDebug::getStringPoolEntry(StringRef Str) { - std::pair &Entry = StringPool[Str]; +MCSymbol *DwarfUnits::getStringPoolEntry(StringRef Str) { + std::pair &Entry = + StringPool.GetOrCreateValue(Str).getValue(); if (Entry.first) return Entry.first; Entry.second = NextStringPoolNumber++; - return Entry.first = Asm->GetTempSymbol("string", Entry.second); + return Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); } -/// assignAbbrevNumber - Define a unique number for the abbreviation. -/// -void DwarfDebug::assignAbbrevNumber(DIEAbbrev &Abbrev) { +unsigned DwarfUnits::getStringPoolIndex(StringRef Str) { + std::pair &Entry = + StringPool.GetOrCreateValue(Str).getValue(); + if (Entry.first) return Entry.second; + + Entry.second = NextStringPoolNumber++; + Entry.first = Asm->GetTempSymbol(StringPref, Entry.second); + return Entry.second; +} + +unsigned DwarfUnits::getAddrPoolIndex(MCSymbol *Sym) { + std::pair &Entry = AddressPool[Sym]; + if (Entry.first) return Entry.second; + + Entry.second = NextAddrPoolNumber++; + Entry.first = Sym; + return Entry.second; +} + +// Define a unique number for the abbreviation. +// +void DwarfUnits::assignAbbrevNumber(DIEAbbrev &Abbrev) { // Profile the node so that we can make it unique. FoldingSetNodeID ID; Abbrev.Profile(ID); // Check the set for priors. - DIEAbbrev *InSet = AbbreviationsSet.GetOrInsertNode(&Abbrev); + DIEAbbrev *InSet = AbbreviationsSet->GetOrInsertNode(&Abbrev); // If it's newly added. if (InSet == &Abbrev) { // Add to abbreviation list. - Abbreviations.push_back(&Abbrev); + Abbreviations->push_back(&Abbrev); // Assign the vector position + 1 as its number. - Abbrev.setNumber(Abbreviations.size()); + Abbrev.setNumber(Abbreviations->size()); } else { // Assign existing abbreviation number. Abbrev.setNumber(InSet->getNumber()); } } -/// getRealLinkageName - If special LLVM prefix that is used to inform the asm -/// printer to not emit usual symbol prefix before the symbol name is used then -/// return linkage name after skipping this special LLVM prefix. +// If special LLVM prefix that is used to inform the asm +// printer to not emit usual symbol prefix before the symbol name is used then +// return linkage name after skipping this special LLVM prefix. static StringRef getRealLinkageName(StringRef LinkageName) { char One = '\1'; if (LinkageName.startswith(StringRef(&One, 1))) @@ -275,7 +318,7 @@ static StringRef getObjCMethodName(StringRef In) { static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, DIE* Die) { if (!SP.isDefinition()) return; - + TheCU->addAccelName(SP.getName(), Die); // If the linkage name is different than the name, go ahead and output @@ -296,10 +339,9 @@ static void addSubprogramNames(CompileUnit *TheCU, DISubprogram SP, } } -/// updateSubprogramScopeDIE - Find DIE for the given subprogram and -/// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. -/// If there are global variables in this scope then create and insert -/// DIEs for these variables. +// Find DIE for the given subprogram and attach appropriate DW_AT_low_pc +// and DW_AT_high_pc attributes. If there are global variables in this +// scope then create and insert DIEs for these variables. DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode) { DIE *SPDie = SPCU->getDIE(SPNode); @@ -310,11 +352,16 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // If we're updating an abstract DIE, then we will be adding the children and // object pointer later on. But what we don't want to do is process the // concrete DIE twice. - if (DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode)) { + DIE *AbsSPDIE = AbstractSPDies.lookup(SPNode); + if (AbsSPDIE) { + bool InSameCU = (AbsSPDIE->getCompileUnit() == SPCU->getCUDie()); // Pick up abstract subprogram DIE. SPDie = new DIE(dwarf::DW_TAG_subprogram); + // If AbsSPDIE belongs to a different CU, use DW_FORM_ref_addr instead of + // DW_FORM_ref4. SPCU->addDIEEntry(SPDie, dwarf::DW_AT_abstract_origin, - dwarf::DW_FORM_ref4, AbsSPDIE); + InSameCU ? dwarf::DW_FORM_ref4 : dwarf::DW_FORM_ref_addr, + AbsSPDIE); SPCU->addDie(SPDie); } else { DISubprogram SPDecl = SP.getFunctionDeclaration(); @@ -347,17 +394,19 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, } DIE *SPDeclDie = SPDie; SPDie = new DIE(dwarf::DW_TAG_subprogram); - SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, dwarf::DW_FORM_ref4, - SPDeclDie); + SPCU->addDIEEntry(SPDie, dwarf::DW_AT_specification, + dwarf::DW_FORM_ref4, SPDeclDie); SPCU->addDie(SPDie); } } } - SPCU->addLabel(SPDie, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber())); - SPCU->addLabel(SPDie, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - Asm->GetTempSymbol("func_end", Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_low_pc, + Asm->GetTempSymbol("func_begin", + Asm->getFunctionNumber())); + SPCU->addLabelAddress(SPDie, dwarf::DW_AT_high_pc, + Asm->GetTempSymbol("func_end", + Asm->getFunctionNumber())); const TargetRegisterInfo *RI = Asm->TM.getRegisterInfo(); MachineLocation Location(RI->getFrameRegister(*Asm->MF)); SPCU->addAddress(SPDie, dwarf::DW_AT_frame_base, Location); @@ -365,13 +414,13 @@ DIE *DwarfDebug::updateSubprogramScopeDIE(CompileUnit *SPCU, // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_subprogram nodes. addSubprogramNames(SPCU, SP, SPDie); - + return SPDie; } -/// constructLexicalScope - Construct new DW_TAG_lexical_block -/// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. -DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, +// Construct new DW_TAG_lexical_block for this scope and attach +// DW_AT_low_pc/DW_AT_high_pc labels. +DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { DIE *ScopeDIE = new DIE(dwarf::DW_TAG_lexical_block); if (Scope->isAbstractScope()) @@ -387,7 +436,7 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() + DebugRangeSymbols.size() * Asm->getDataLayout().getPointerSize()); for (SmallVector::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { @@ -399,23 +448,22 @@ DIE *DwarfDebug::constructLexicalScopeDIE(CompileUnit *TheCU, return ScopeDIE; } - const MCSymbol *Start = getLabelBeforeInsn(RI->first); - const MCSymbol *End = getLabelAfterInsn(RI->second); + MCSymbol *Start = getLabelBeforeInsn(RI->first); + MCSymbol *End = getLabelAfterInsn(RI->second); if (End == 0) return 0; assert(Start->isDefined() && "Invalid starting label for an inlined scope!"); assert(End->isDefined() && "Invalid end label for an inlined scope!"); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, Start); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, End); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, Start); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, End); return ScopeDIE; } -/// constructInlinedScopeDIE - This scope represents inlined body of -/// a function. Construct DIE to represent this concrete inlined copy -/// of the function. +// This scope represents inlined body of a function. Construct DIE to +// represent this concrete inlined copy of the function. DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { const SmallVector &Ranges = Scope->getRanges(); @@ -433,8 +481,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, } SmallVector::const_iterator RI = Ranges.begin(); - const MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); - const MCSymbol *EndLabel = getLabelAfterInsn(RI->second); + MCSymbol *StartLabel = getLabelBeforeInsn(RI->first); + MCSymbol *EndLabel = getLabelAfterInsn(RI->second); if (StartLabel == 0 || EndLabel == 0) { llvm_unreachable("Unexpected Start and End labels for an inlined scope!"); @@ -453,7 +501,7 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, // .debug_range as a uint, size 4, for now. emitDIE will handle // DW_AT_ranges appropriately. TheCU->addUInt(ScopeDIE, dwarf::DW_AT_ranges, dwarf::DW_FORM_data4, - DebugRangeSymbols.size() + DebugRangeSymbols.size() * Asm->getDataLayout().getPointerSize()); for (SmallVector::const_iterator RI = Ranges.begin(), RE = Ranges.end(); RI != RE; ++RI) { @@ -463,10 +511,8 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DebugRangeSymbols.push_back(NULL); DebugRangeSymbols.push_back(NULL); } else { - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, - StartLabel); - TheCU->addLabel(ScopeDIE, dwarf::DW_AT_high_pc, dwarf::DW_FORM_addr, - EndLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_low_pc, StartLabel); + TheCU->addLabelAddress(ScopeDIE, dwarf::DW_AT_high_pc, EndLabel); } InlinedSubprogramDIEs.insert(OriginDIE); @@ -487,21 +533,28 @@ DIE *DwarfDebug::constructInlinedScopeDIE(CompileUnit *TheCU, DILocation DL(Scope->getInlinedAt()); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_file, 0, - GetOrCreateSourceID(DL.getFilename(), DL.getDirectory())); + getOrCreateSourceID(DL.getFilename(), DL.getDirectory(), + TheCU->getUniqueID())); TheCU->addUInt(ScopeDIE, dwarf::DW_AT_call_line, 0, DL.getLineNumber()); // Add name to the name table, we do this here because we're guaranteed // to have concrete versions of our DW_TAG_inlined_subprogram nodes. addSubprogramNames(TheCU, InlinedSP, ScopeDIE); - + return ScopeDIE; } -/// constructScopeDIE - Construct a DIE for this scope. +// Construct a DIE for this scope. DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (!Scope || !Scope->getScopeNode()) return NULL; + DIScope DS(Scope->getScopeNode()); + // Early return to avoid creating dangling variable|scope DIEs. + if (!Scope->getInlinedAt() && DS.isSubprogram() && Scope->isAbstractScope() && + !TheCU->getDIE(DS)) + return NULL; + SmallVector Children; DIE *ObjectPointer = NULL; @@ -509,7 +562,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { if (LScopes.isCurrentFunctionScope(Scope)) for (unsigned i = 0, N = CurrentFnArguments.size(); i < N; ++i) if (DbgVariable *ArgDV = CurrentFnArguments[i]) - if (DIE *Arg = + if (DIE *Arg = TheCU->constructVariableDIE(ArgDV, Scope->isAbstractScope())) { Children.push_back(Arg); if (ArgDV->isObjectPointer()) ObjectPointer = Arg; @@ -518,7 +571,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { // Collect lexical scope children first. const SmallVector &Variables = ScopeVariables.lookup(Scope); for (unsigned i = 0, N = Variables.size(); i < N; ++i) - if (DIE *Variable = + if (DIE *Variable = TheCU->constructVariableDIE(Variables[i], Scope->isAbstractScope())) { Children.push_back(Variable); if (Variables[i]->isObjectPointer()) ObjectPointer = Variable; @@ -527,7 +580,6 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { for (unsigned j = 0, M = Scopes.size(); j < M; ++j) if (DIE *Nested = constructScopeDIE(TheCU, Scopes[j])) Children.push_back(Nested); - DIScope DS(Scope->getScopeNode()); DIE *ScopeDIE = NULL; if (Scope->getInlinedAt()) ScopeDIE = constructInlinedScopeDIE(TheCU, Scope); @@ -548,7 +600,7 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { return NULL; ScopeDIE = constructLexicalScopeDIE(TheCU, Scope); } - + if (!ScopeDIE) return NULL; // Add children @@ -566,24 +618,33 @@ DIE *DwarfDebug::constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope) { return ScopeDIE; } -/// GetOrCreateSourceID - Look up the source id with the given directory and -/// source file names. If none currently exists, create a new id and insert it -/// in the SourceIds map. This can update DirectoryNames and SourceFileNames -/// maps as well. -unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, - StringRef DirName) { +// Look up the source id with the given directory and source file names. +// If none currently exists, create a new id and insert it in the +// SourceIds map. This can update DirectoryNames and SourceFileNames maps +// as well. +unsigned DwarfDebug::getOrCreateSourceID(StringRef FileName, + StringRef DirName, unsigned CUID) { + // If we use .loc in assembly, we can't separate .file entries according to + // compile units. Thus all files will belong to the default compile unit. + if (Asm->TM.hasMCUseLoc() && + Asm->OutStreamer.getKind() == MCStreamer::SK_AsmStreamer) + CUID = 0; + // If FE did not provide a file name, then assume stdin. if (FileName.empty()) - return GetOrCreateSourceID("", StringRef()); + return getOrCreateSourceID("", StringRef(), CUID); // TODO: this might not belong here. See if we can factor this better. if (DirName == CompilationDir) DirName = ""; - unsigned SrcId = SourceIdMap.size()+1; + // FileIDCUMap stores the current ID for the given compile unit. + unsigned SrcId = FileIDCUMap[CUID] + 1; - // We look up the file/dir pair by concatenating them with a zero byte. + // We look up the CUID/file/dir by concatenating them with a zero byte. SmallString<128> NamePair; + NamePair += CUID; + NamePair += '\0'; NamePair += DirName; NamePair += '\0'; // Zero bytes are not allowed in paths. NamePair += FileName; @@ -592,37 +653,57 @@ unsigned DwarfDebug::GetOrCreateSourceID(StringRef FileName, if (Ent.getValue() != SrcId) return Ent.getValue(); + FileIDCUMap[CUID] = SrcId; // Print out a .file directive to specify files for .loc directives. - Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName); + Asm->OutStreamer.EmitDwarfFileDirective(SrcId, DirName, FileName, CUID); return SrcId; } -/// constructCompileUnit - Create new CompileUnit for the given -/// metadata node with tag DW_TAG_compile_unit. +// Create new CompileUnit for the given metadata node with tag +// DW_TAG_compile_unit. CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { DICompileUnit DIUnit(N); StringRef FN = DIUnit.getFilename(); CompilationDir = DIUnit.getDirectory(); - unsigned ID = GetOrCreateSourceID(FN, CompilationDir); DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); - CompileUnit *NewCU = new CompileUnit(ID, DIUnit.getLanguage(), Die, - Asm, this); + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, + DIUnit.getLanguage(), Die, Asm, + this, &InfoHolder); + + FileIDCUMap[NewCU->getUniqueID()] = 0; + // Call this to emit a .file directive if it wasn't emitted for the source + // file this CU comes from yet. + getOrCreateSourceID(FN, CompilationDir, NewCU->getUniqueID()); + NewCU->addString(Die, dwarf::DW_AT_producer, DIUnit.getProducer()); NewCU->addUInt(Die, dwarf::DW_AT_language, dwarf::DW_FORM_data2, DIUnit.getLanguage()); NewCU->addString(Die, dwarf::DW_AT_name, FN); // 2.17.1 requires that we use DW_AT_low_pc for a single entry point - // into an entity. - NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + // into an entity. We're using 0 (or a NULL label) for this. + NewCU->addLabelAddress(Die, dwarf::DW_AT_low_pc, NULL); + + // Define start line table label for each Compile Unit. + MCSymbol *LineTableStartSym = Asm->GetTempSymbol("line_table_start", + NewCU->getUniqueID()); + Asm->OutStreamer.getContext().setMCLineTableSymbol(LineTableStartSym, + NewCU->getUniqueID()); + // DW_AT_stmt_list is a offset of line number information for this // compile unit in debug_line section. + // The line table entries are not always emitted in assembly, so it + // is not okay to use line_table_start here. if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, - Asm->GetTempSymbol("section_line")); - else + NewCU->getUniqueID() == 0 ? + Asm->GetTempSymbol("section_line") : LineTableStartSym); + else if (NewCU->getUniqueID() == 0) NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, 0); + else + NewCU->addDelta(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_data4, + LineTableStartSym, DwarfLineSectionSym); if (!CompilationDir.empty()) NewCU->addString(Die, dwarf::DW_AT_comp_dir, CompilationDir); @@ -632,19 +713,22 @@ CompileUnit *DwarfDebug::constructCompileUnit(const MDNode *N) { StringRef Flags = DIUnit.getFlags(); if (!Flags.empty()) NewCU->addString(Die, dwarf::DW_AT_APPLE_flags, Flags); - + if (unsigned RVer = DIUnit.getRunTimeVersion()) NewCU->addUInt(Die, dwarf::DW_AT_APPLE_major_runtime_vers, dwarf::DW_FORM_data1, RVer); if (!FirstCU) FirstCU = NewCU; + + InfoHolder.addUnit(NewCU); + CUMap.insert(std::make_pair(N, NewCU)); return NewCU; } -/// construct SubprogramDIE - Construct subprogram DIE. -void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, +// Construct subprogram DIE. +void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N) { CompileUnit *&CURef = SPMap[N]; if (CURef) @@ -665,133 +749,83 @@ void DwarfDebug::constructSubprogramDIE(CompileUnit *TheCU, // Add to context owner. TheCU->addToContextOwner(SubprogramDie, SP.getContext()); - return; -} - -/// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such -/// as llvm.dbg.enum and llvm.dbg.ty -void DwarfDebug::collectInfoFromNamedMDNodes(Module *M) { - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.sp")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.gv")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - const MDNode *N = NMD->getOperand(i); - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.enum")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } - - if (NamedMDNode *NMD = M->getNamedMetadata("llvm.dbg.ty")) - for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { - DIType Ty(NMD->getOperand(i)); - if (CompileUnit *CU = CUMap.lookup(Ty.getCompileUnit())) - CU->getOrCreateTypeDIE(Ty); - } -} - -/// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. -/// FIXME - Remove this when dragon-egg and llvm-gcc switch to DIBuilder. -bool DwarfDebug::collectLegacyDebugInfo(Module *M) { - DebugInfoFinder DbgFinder; - DbgFinder.processModule(*M); - - bool HasDebugInfo = false; - // Scan all the compile-units to see if there are any marked as the main - // unit. If not, we do not generate debug info. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) { - if (DICompileUnit(*I).isMain()) { - HasDebugInfo = true; - break; - } - } - if (!HasDebugInfo) return false; - - // Create all the compile unit DIEs. - for (DebugInfoFinder::iterator I = DbgFinder.compile_unit_begin(), - E = DbgFinder.compile_unit_end(); I != E; ++I) - constructCompileUnit(*I); - - // Create DIEs for each global variable. - for (DebugInfoFinder::iterator I = DbgFinder.global_variable_begin(), - E = DbgFinder.global_variable_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DIGlobalVariable(N).getCompileUnit())) - CU->createGlobalVariableDIE(N); - } - - // Create DIEs for each subprogram. - for (DebugInfoFinder::iterator I = DbgFinder.subprogram_begin(), - E = DbgFinder.subprogram_end(); I != E; ++I) { - const MDNode *N = *I; - if (CompileUnit *CU = CUMap.lookup(DISubprogram(N).getCompileUnit())) - constructSubprogramDIE(CU, N); - } - - return HasDebugInfo; + // Expose as global, if requested. + if (GenerateDwarfPubNamesSection) + TheCU->addGlobalName(SP.getName(), SubprogramDie); } -/// beginModule - Emit all Dwarf sections that should come prior to the -/// content. Create global DIEs and emit initial debug info sections. -/// This is invoked by the target AsmPrinter. -void DwarfDebug::beginModule(Module *M) { +// Emit all Dwarf sections that should come prior to the content. Create +// global DIEs and emit initial debug info sections. This is invoked by +// the target AsmPrinter. +void DwarfDebug::beginModule() { if (DisableDebugInfoPrinting) return; + const Module *M = MMI->getModule(); + // If module has named metadata anchors then use them, otherwise scan the // module using debug info finder to collect debug info. NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu"); - if (CU_Nodes) { - for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { - DICompileUnit CUNode(CU_Nodes->getOperand(i)); - CompileUnit *CU = constructCompileUnit(CUNode); - DIArray GVs = CUNode.getGlobalVariables(); - for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) - CU->createGlobalVariableDIE(GVs.getElement(i)); - DIArray SPs = CUNode.getSubprograms(); - for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) - constructSubprogramDIE(CU, SPs.getElement(i)); - DIArray EnumTypes = CUNode.getEnumTypes(); - for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); - DIArray RetainedTypes = CUNode.getRetainedTypes(); - for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) - CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); - } - } else if (!collectLegacyDebugInfo(M)) + if (!CU_Nodes) return; - collectInfoFromNamedMDNodes(M); - + // Emit initial sections so we can reference labels later. + emitSectionLabels(); + + for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { + DICompileUnit CUNode(CU_Nodes->getOperand(i)); + CompileUnit *CU = constructCompileUnit(CUNode); + DIArray GVs = CUNode.getGlobalVariables(); + for (unsigned i = 0, e = GVs.getNumElements(); i != e; ++i) + CU->createGlobalVariableDIE(GVs.getElement(i)); + DIArray SPs = CUNode.getSubprograms(); + for (unsigned i = 0, e = SPs.getNumElements(); i != e; ++i) + constructSubprogramDIE(CU, SPs.getElement(i)); + DIArray EnumTypes = CUNode.getEnumTypes(); + for (unsigned i = 0, e = EnumTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(EnumTypes.getElement(i)); + DIArray RetainedTypes = CUNode.getRetainedTypes(); + for (unsigned i = 0, e = RetainedTypes.getNumElements(); i != e; ++i) + CU->getOrCreateTypeDIE(RetainedTypes.getElement(i)); + // If we're splitting the dwarf out now that we've got the entire + // CU then construct a skeleton CU based upon it. + if (useSplitDwarf()) { + // This should be a unique identifier when we want to build .dwp files. + CU->addUInt(CU->getCUDie(), dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + // Now construct the skeleton CU associated. + constructSkeletonCU(CUNode); + } + } + // Tell MMI that we have debug info. MMI->setDebugInfoAvailability(true); - - // Emit initial sections. - EmitSectionLabels(); // Prime section data. SectionMap.insert(Asm->getObjFileLowering().getTextSection()); } -/// endModule - Emit all Dwarf sections that should come after the content. -/// -void DwarfDebug::endModule() { - if (!FirstCU) return; +// Attach DW_AT_inline attribute with inlined subprogram DIEs. +void DwarfDebug::computeInlinedDIEs() { + // Attach DW_AT_inline attribute with inlined subprogram DIEs. + for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), + AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { + DIE *ISP = *AI; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } + for (DenseMap::iterator AI = AbstractSPDies.begin(), + AE = AbstractSPDies.end(); AI != AE; ++AI) { + DIE *ISP = AI->second; + if (InlinedSubprogramDIEs.count(ISP)) + continue; + FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); + } +} + +// Collect info for variables that were optimized out. +void DwarfDebug::collectDeadVariables() { const Module *M = MMI->getModule(); DenseMap DeadFnScopeMap; - // Collect info for variables that were optimized out. if (NamedMDNode *CU_Nodes = M->getNamedMetadata("llvm.dbg.cu")) { for (unsigned i = 0, e = CU_Nodes->getNumOperands(); i != e; ++i) { DICompileUnit TheCU(CU_Nodes->getOperand(i)); @@ -804,7 +838,7 @@ void DwarfDebug::endModule() { DIArray Variables = SP.getVariables(); if (Variables.getNumElements() == 0) continue; - LexicalScope *Scope = + LexicalScope *Scope = new LexicalScope(NULL, DIDescriptor(SP), NULL, false); DeadFnScopeMap[SP] = Scope; @@ -817,27 +851,22 @@ void DwarfDebug::endModule() { DIVariable DV(Variables.getElement(vi)); if (!DV.Verify()) continue; DbgVariable *NewVar = new DbgVariable(DV, NULL); - if (DIE *VariableDIE = + if (DIE *VariableDIE = SPCU->constructVariableDIE(NewVar, Scope->isAbstractScope())) ScopeDIE->addChild(VariableDIE); } } } } + DeleteContainerSeconds(DeadFnScopeMap); +} + +void DwarfDebug::finalizeModuleInfo() { + // Collect info for variables that were optimized out. + collectDeadVariables(); // Attach DW_AT_inline attribute with inlined subprogram DIEs. - for (SmallPtrSet::iterator AI = InlinedSubprogramDIEs.begin(), - AE = InlinedSubprogramDIEs.end(); AI != AE; ++AI) { - DIE *ISP = *AI; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); - } - for (DenseMap::iterator AI = AbstractSPDies.begin(), - AE = AbstractSPDies.end(); AI != AE; ++AI) { - DIE *ISP = AI->second; - if (InlinedSubprogramDIEs.count(ISP)) - continue; - FirstCU->addUInt(ISP, dwarf::DW_AT_inline, 0, dwarf::DW_INL_inlined); - } + computeInlinedDIEs(); // Emit DW_AT_containing_type attribute to connect types with their // vtable holding type. @@ -847,6 +876,13 @@ void DwarfDebug::endModule() { TheCU->constructContainingTypeDIEs(); } + // Compute DIE offsets and sizes. + InfoHolder.computeSizeAndOffsets(); + if (useSplitDwarf()) + SkeletonHolder.computeSizeAndOffsets(); +} + +void DwarfDebug::endSections() { // Standard sections final addresses. Asm->OutStreamer.SwitchSection(Asm->getObjFileLowering().getTextSection()); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("text_end")); @@ -858,15 +894,79 @@ void DwarfDebug::endModule() { Asm->OutStreamer.SwitchSection(SectionMap[I]); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("section_end", I+1)); } +} + +// Emit all Dwarf sections that should come after the content. +void DwarfDebug::endModule() { - // Compute DIE offsets and sizes. - computeSizeAndOffsets(); + if (!FirstCU) return; + + // End any existing sections. + // TODO: Does this need to happen? + endSections(); + + // Finalize the debug info for the module. + finalizeModuleInfo(); + + if (!useSplitDwarf()) { + // Emit all the DIEs into a debug info section. + emitDebugInfo(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); - // Emit all the DIEs into a debug info section - emitDebugInfo(); + // Emit info into a debug loc section. + emitDebugLoc(); + + // Emit info into a debug aranges section. + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacInfo(); + + // Emit inline info. + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); + } else { + // TODO: Fill this in for separated debug sections and separate + // out information into new sections. - // Corresponding abbreviations into a abbrev section. - emitAbbreviations(); + // Emit the debug info section and compile units. + emitDebugInfo(); + emitDebugInfoDWO(); + + // Corresponding abbreviations into a abbrev section. + emitAbbreviations(); + emitDebugAbbrevDWO(); + + // Emit info into a debug loc section. + emitDebugLoc(); + + // Emit info into a debug aranges section. + emitDebugARanges(); + + // Emit info into a debug ranges section. + emitDebugRanges(); + + // Emit info into a debug macinfo section. + emitDebugMacInfo(); + + // Emit DWO addresses. + InfoHolder.emitAddresses(Asm->getObjFileLowering().getDwarfAddrSection()); + + // Emit inline info. + // TODO: When we don't need the option anymore we + // can remove all of the code that this section + // depends upon. + if (useDarwinGDBCompat()) + emitDebugInlineInfo(); + } // Emit info into the dwarf accelerator table sections. if (useDwarfAccelTables()) { @@ -875,45 +975,37 @@ void DwarfDebug::endModule() { emitAccelNamespaces(); emitAccelTypes(); } - + + // Emit info into a debug pubnames section, if requested. + if (GenerateDwarfPubNamesSection) + emitDebugPubnames(); + // Emit info into a debug pubtypes section. // TODO: When we don't need the option anymore we can // remove all of the code that adds to the table. if (useDarwinGDBCompat()) emitDebugPubTypes(); - // Emit info into a debug loc section. - emitDebugLoc(); - - // Emit info into a debug aranges section. - EmitDebugARanges(); - - // Emit info into a debug ranges section. - emitDebugRanges(); - - // Emit info into a debug macinfo section. - emitDebugMacInfo(); - - // Emit inline info. - // TODO: When we don't need the option anymore we - // can remove all of the code that this section - // depends upon. - if (useDarwinGDBCompat()) - emitDebugInlineInfo(); - - // Emit info into a debug str section. + // Finally emit string information into a string table. emitDebugStr(); + if (useSplitDwarf()) + emitDebugStrDWO(); // clean up. - DeleteContainerSeconds(DeadFnScopeMap); SPMap.clear(); for (DenseMap::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) delete I->second; - FirstCU = NULL; // Reset for the next Module, if any. + + for (SmallVector::iterator I = SkeletonCUs.begin(), + E = SkeletonCUs.end(); I != E; ++I) + delete *I; + + // Reset these for the next Module if we have one. + FirstCU = NULL; } -/// findAbstractVariable - Find abstract variable, if any, associated with Var. +// Find abstract variable, if any, associated with Var. DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, DebugLoc ScopeLoc) { LLVMContext &Ctx = DV->getContext(); @@ -933,8 +1025,7 @@ DbgVariable *DwarfDebug::findAbstractVariable(DIVariable &DV, return AbsDbgVariable; } -/// addCurrentFnArgument - If Var is a current function argument then add -/// it to CurrentFnArguments list. +// If Var is a current function argument then add it to CurrentFnArguments list. bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, DbgVariable *Var, LexicalScope *Scope) { if (!LScopes.isCurrentFunctionScope(Scope)) @@ -943,7 +1034,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, if (DV.getTag() != dwarf::DW_TAG_arg_variable) return false; unsigned ArgNo = DV.getArgNumber(); - if (ArgNo == 0) + if (ArgNo == 0) return false; size_t Size = CurrentFnArguments.size(); @@ -957,8 +1048,7 @@ bool DwarfDebug::addCurrentFnArgument(const MachineFunction *MF, return true; } -/// collectVariableInfoFromMMITable - Collect variable information from -/// side table maintained by MMI. +// Collect variable information from side table maintained by MMI. void DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, SmallPtrSet &Processed) { @@ -987,8 +1077,8 @@ DwarfDebug::collectVariableInfoFromMMITable(const MachineFunction *MF, } } -/// isDbgValueInDefinedReg - Return true if debug value, encoded by -/// DBG_VALUE instruction, is in a defined reg. +// Return true if debug value, encoded by DBG_VALUE instruction, is in a +// defined reg. static bool isDbgValueInDefinedReg(const MachineInstr *MI) { assert(MI->isDebugValue() && "Invalid DBG_VALUE machine instruction!"); return MI->getNumOperands() == 3 && @@ -996,10 +1086,9 @@ static bool isDbgValueInDefinedReg(const MachineInstr *MI) { MI->getOperand(1).isImm() && MI->getOperand(1).getImm() == 0; } -/// getDebugLocEntry - Get .debug_loc entry for the instruction range starting -/// at MI. -static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, - const MCSymbol *FLabel, +// Get .debug_loc entry for the instruction range starting at MI. +static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, + const MCSymbol *FLabel, const MCSymbol *SLabel, const MachineInstr *MI) { const MDNode *Var = MI->getOperand(MI->getNumOperands() - 1).getMetadata(); @@ -1023,12 +1112,12 @@ static DotDebugLocEntry getDebugLocEntry(AsmPrinter *Asm, llvm_unreachable("Unexpected 3 operand DBG_VALUE instruction!"); } -/// collectVariableInfo - Find variables for each lexical scope. +// Find variables for each lexical scope. void DwarfDebug::collectVariableInfo(const MachineFunction *MF, SmallPtrSet &Processed) { - /// collection info from MMI table. + // collection info from MMI table. collectVariableInfoFromMMITable(MF, Processed); for (SmallVectorImpl::const_iterator @@ -1050,16 +1139,10 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, if (DV.getTag() == dwarf::DW_TAG_arg_variable && DISubprogram(DV.getContext()).describes(MF->getFunction())) Scope = LScopes.getCurrentFunctionScope(); - else { - if (DV.getVersion() <= LLVMDebugVersion9) - Scope = LScopes.findLexicalScope(MInsn->getDebugLoc()); - else { - if (MDNode *IA = DV.getInlinedAt()) - Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); - else - Scope = LScopes.findLexicalScope(cast(DV->getOperand(1))); - } - } + else if (MDNode *IA = DV.getInlinedAt()) + Scope = LScopes.findInlinedScope(DebugLoc::getFromDILocation(IA)); + else + Scope = LScopes.findLexicalScope(cast(DV->getOperand(1))); // If variable scope is not found then skip this variable. if (!Scope) continue; @@ -1080,7 +1163,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, continue; } - // handle multiple DBG_VALUE instructions describing one variable. + // Handle multiple DBG_VALUE instructions describing one variable. RegVar->setDotDebugLocOffset(DotDebugLocEntries.size()); for (SmallVectorImpl::const_iterator @@ -1103,7 +1186,7 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, SLabel = FunctionEndSym; else { const MachineInstr *End = HI[1]; - DEBUG(dbgs() << "DotDebugLoc Pair:\n" + DEBUG(dbgs() << "DotDebugLoc Pair:\n" << "\t" << *Begin << "\t" << *End << "\n"); if (End->isDebugValue()) SLabel = getLabelBeforeInsn(End); @@ -1134,19 +1217,19 @@ DwarfDebug::collectVariableInfo(const MachineFunction *MF, } } -/// getLabelBeforeInsn - Return Label preceding the instruction. -const MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { +// Return Label preceding the instruction. +MCSymbol *DwarfDebug::getLabelBeforeInsn(const MachineInstr *MI) { MCSymbol *Label = LabelsBeforeInsn.lookup(MI); assert(Label && "Didn't insert label before instruction"); return Label; } -/// getLabelAfterInsn - Return Label immediately following the instruction. -const MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { +// Return Label immediately following the instruction. +MCSymbol *DwarfDebug::getLabelAfterInsn(const MachineInstr *MI) { return LabelsAfterInsn.lookup(MI); } -/// beginInstruction - Process beginning of an instruction. +// Process beginning of an instruction. void DwarfDebug::beginInstruction(const MachineInstr *MI) { // Check if source location changes, but ignore DBG_VALUE locations. if (!MI->isDebugValue()) { @@ -1188,7 +1271,7 @@ void DwarfDebug::beginInstruction(const MachineInstr *MI) { I->second = PrevLabel; } -/// endInstruction - Process end of an instruction. +// Process end of an instruction. void DwarfDebug::endInstruction(const MachineInstr *MI) { // Don't create a new label after DBG_VALUE instructions. // They don't generate code. @@ -1214,11 +1297,10 @@ void DwarfDebug::endInstruction(const MachineInstr *MI) { I->second = PrevLabel; } -/// identifyScopeMarkers() - -/// Each LexicalScope has first instruction and last instruction to mark -/// beginning and end of a scope respectively. Create an inverse map that list -/// scopes starts (and ends) with an instruction. One instruction may start (or -/// end) multiple scopes. Ignore scopes that are not reachable. +// Each LexicalScope has first instruction and last instruction to mark +// beginning and end of a scope respectively. Create an inverse map that list +// scopes starts (and ends) with an instruction. One instruction may start (or +// end) multiple scopes. Ignore scopes that are not reachable. void DwarfDebug::identifyScopeMarkers() { SmallVector WorkList; WorkList.push_back(LScopes.getCurrentFunctionScope()); @@ -1247,15 +1329,15 @@ void DwarfDebug::identifyScopeMarkers() { } } -/// getScopeNode - Get MDNode for DebugLoc's scope. +// Get MDNode for DebugLoc's scope. static MDNode *getScopeNode(DebugLoc DL, const LLVMContext &Ctx) { if (MDNode *InlinedAt = DL.getInlinedAt(Ctx)) return getScopeNode(DebugLoc::getFromDILocation(InlinedAt), Ctx); return DL.getScope(Ctx); } -/// getFnDebugLoc - Walk up the scope chain of given debug loc and find -/// line number info for the function. +// Walk up the scope chain of given debug loc and find line number info +// for the function. static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { const MDNode *Scope = getScopeNode(DL, Ctx); DISubprogram SP = getDISubprogram(Scope); @@ -1271,14 +1353,21 @@ static DebugLoc getFnDebugLoc(DebugLoc DL, const LLVMContext &Ctx) { return DebugLoc(); } -/// beginFunction - Gather pre-function debug information. Assumes being -/// emitted immediately after the function entry point. +// Gather pre-function debug information. Assumes being called immediately +// after the function entry point has been emitted. void DwarfDebug::beginFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo()) return; LScopes.initialize(*MF); if (LScopes.empty()) return; identifyScopeMarkers(); + // Set DwarfCompileUnitID in MCContext to the Compile Unit this function + // belongs to. + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); + CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); + assert(TheCU && "Unable to find compile unit!"); + Asm->OutStreamer.getContext().setDwarfCompileUnitID(TheCU->getUniqueID()); + FunctionBeginSym = Asm->GetTempSymbol("func_begin", Asm->getFunctionNumber()); // Assumes in correct section after the entry point. @@ -1287,7 +1376,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { assert(UserVariables.empty() && DbgValues.empty() && "Maps weren't cleaned"); const TargetRegisterInfo *TRI = Asm->TM.getRegisterInfo(); - /// LiveUserVar - Map physreg numbers to the MDNode they contain. + // LiveUserVar - Map physreg numbers to the MDNode they contain. std::vector LiveUserVar(TRI->getNumRegs()); for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); @@ -1327,7 +1416,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { if (History.size() >= 2 && Prev->isIdenticalTo(History[History.size() - 2])) { DEBUG(dbgs() << "Coalescing identical DBG_VALUE entries:\n" - << "\t" << *Prev + << "\t" << *Prev << "\t" << *History[History.size() - 2] << "\n"); History.pop_back(); } @@ -1413,7 +1502,7 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { const MachineInstr *Prev = History.back(); if (Prev->isDebugValue() && isDbgValueInDefinedReg(Prev)) { const MachineBasicBlock *PrevMBB = Prev->getParent(); - MachineBasicBlock::const_iterator LastMI = + MachineBasicBlock::const_iterator LastMI = PrevMBB->getLastNonDebugInstr(); if (LastMI == PrevMBB->end()) // Drop DBG_VALUE for empty range. @@ -1442,7 +1531,9 @@ void DwarfDebug::beginFunction(const MachineFunction *MF) { MF->getFunction()->getContext()); recordSourceLine(FnStartDL.getLine(), FnStartDL.getCol(), FnStartDL.getScope(MF->getFunction()->getContext()), - 0); + // We'd like to list the prologue as "not statements" but GDB behaves + // poorly if we do that. Revisit this with caution/GDB (7.5+) testing. + DWARF2_FLAG_IS_STMT); } } @@ -1452,8 +1543,7 @@ void DwarfDebug::addScopeVariable(LexicalScope *LS, DbgVariable *Var) { // Vars.push_back(Var); } -/// endFunction - Gather and emit post-function debug information. -/// +// Gather and emit post-function debug information. void DwarfDebug::endFunction(const MachineFunction *MF) { if (!MMI->hasDebugInfo() || LScopes.empty()) return; @@ -1462,10 +1552,12 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { Asm->getFunctionNumber()); // Assumes in correct section after the entry point. Asm->OutStreamer.EmitLabel(FunctionEndSym); - + // Set DwarfCompileUnitID in MCContext to default value. + Asm->OutStreamer.getContext().setDwarfCompileUnitID(0); + SmallPtrSet ProcessedVars; collectVariableInfo(MF, ProcessedVars); - + LexicalScope *FnScope = LScopes.getCurrentFunctionScope(); CompileUnit *TheCU = SPMap.lookup(FnScope->getScopeNode()); assert(TheCU && "Unable to find compile unit!"); @@ -1495,9 +1587,9 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { if (ProcessedSPNodes.count(AScope->getScopeNode()) == 0) constructScopeDIE(TheCU, AScope); } - + DIE *CurFnDIE = constructScopeDIE(TheCU, FnScope); - + if (!MF->getTarget().Options.DisableFramePointerElim(*MF)) TheCU->addFlag(CurFnDIE, dwarf::DW_AT_APPLE_omit_frame_ptr); @@ -1518,9 +1610,8 @@ void DwarfDebug::endFunction(const MachineFunction *MF) { PrevLabel = NULL; } -/// recordSourceLine - Register a source line with debug info. Returns the -/// unique label that was emitted and which provides correspondence to -/// the source line list. +// Register a source line with debug info. Returns the unique label that was +// emitted and which provides correspondence to the source line list. void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, unsigned Flags) { StringRef Fn; @@ -1552,7 +1643,8 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, } else llvm_unreachable("Unexpected scope info"); - Src = GetOrCreateSourceID(Fn, Dir); + Src = getOrCreateSourceID(Fn, Dir, + Asm->OutStreamer.getContext().getDwarfCompileUnitID()); } Asm->OutStreamer.EmitDwarfLocDirective(Src, Line, Col, Flags, 0, 0, Fn); } @@ -1561,10 +1653,9 @@ void DwarfDebug::recordSourceLine(unsigned Line, unsigned Col, const MDNode *S, // Emit Methods //===----------------------------------------------------------------------===// -/// computeSizeAndOffset - Compute the size and offset of a DIE. -/// +// Compute the size and offset of a DIE. unsigned -DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { +DwarfUnits::computeSizeAndOffset(DIE *Die, unsigned Offset) { // Get the children. const std::vector &Children = Die->getChildren(); @@ -1573,7 +1664,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + const DIEAbbrev *Abbrev = Abbreviations->at(AbbrevNumber - 1); // Set DIE offset Die->setOffset(Offset); @@ -1581,8 +1672,8 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { // Start the size with the size of abbreviation code. Offset += MCAsmInfo::getULEB128Size(AbbrevNumber); - const SmallVector &Values = Die->getValues(); - const SmallVector &AbbrevData = Abbrev->getData(); + const SmallVectorImpl &Values = Die->getValues(); + const SmallVectorImpl &AbbrevData = Abbrev->getData(); // Size the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) @@ -1595,7 +1686,7 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { "Children flag not set"); for (unsigned j = 0, M = Children.size(); j < M; ++j) - Offset = computeSizeAndOffset(Children[j], Offset, (j + 1) == M); + Offset = computeSizeAndOffset(Children[j], Offset); // End of children marker. Offset += sizeof(int8_t); @@ -1605,57 +1696,68 @@ DwarfDebug::computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last) { return Offset; } -/// computeSizeAndOffsets - Compute the size and offset of all the DIEs. -/// -void DwarfDebug::computeSizeAndOffsets() { - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - // Compute size of compile unit header. - unsigned Offset = +// Compute the size and offset of all the DIEs. +void DwarfUnits::computeSizeAndOffsets() { + // Offset from the beginning of debug info section. + unsigned AccuOffset = 0; + for (SmallVectorImpl::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + (*I)->setDebugInfoOffset(AccuOffset); + unsigned Offset = sizeof(int32_t) + // Length of Compilation Unit Info sizeof(int16_t) + // DWARF version number sizeof(int32_t) + // Offset Into Abbrev. Section sizeof(int8_t); // Pointer Size (in bytes) - computeSizeAndOffset(I->second->getCUDie(), Offset, true); + + unsigned EndOffset = computeSizeAndOffset((*I)->getCUDie(), Offset); + AccuOffset += EndOffset; } } -/// EmitSectionLabels - Emit initial Dwarf sections with a label at -/// the start of each one. -void DwarfDebug::EmitSectionLabels() { +// Emit initial Dwarf sections with a label at the start of each one. +void DwarfDebug::emitSectionLabels() { const TargetLoweringObjectFile &TLOF = Asm->getObjFileLowering(); // Dwarf sections base addresses. DwarfInfoSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); + emitSectionSym(Asm, TLOF.getDwarfInfoSection(), "section_info"); DwarfAbbrevSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); - EmitSectionSym(Asm, TLOF.getDwarfARangesSection()); + emitSectionSym(Asm, TLOF.getDwarfAbbrevSection(), "section_abbrev"); + if (useSplitDwarf()) + DwarfAbbrevDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfAbbrevDWOSection(), + "section_abbrev_dwo"); + emitSectionSym(Asm, TLOF.getDwarfARangesSection()); if (const MCSection *MacroInfo = TLOF.getDwarfMacroInfoSection()) - EmitSectionSym(Asm, MacroInfo); - - EmitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); - EmitSectionSym(Asm, TLOF.getDwarfLocSection()); - EmitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); + emitSectionSym(Asm, MacroInfo); + + DwarfLineSectionSym = + emitSectionSym(Asm, TLOF.getDwarfLineSection(), "section_line"); + emitSectionSym(Asm, TLOF.getDwarfLocSection()); + if (GenerateDwarfPubNamesSection) + emitSectionSym(Asm, TLOF.getDwarfPubNamesSection()); + emitSectionSym(Asm, TLOF.getDwarfPubTypesSection()); DwarfStrSectionSym = - EmitSectionSym(Asm, TLOF.getDwarfStrSection(), "section_str"); - DwarfDebugRangeSectionSym = EmitSectionSym(Asm, TLOF.getDwarfRangesSection(), + emitSectionSym(Asm, TLOF.getDwarfStrSection(), "info_string"); + if (useSplitDwarf()) + DwarfStrDWOSectionSym = + emitSectionSym(Asm, TLOF.getDwarfStrDWOSection(), "skel_string"); + DwarfDebugRangeSectionSym = emitSectionSym(Asm, TLOF.getDwarfRangesSection(), "debug_range"); - DwarfDebugLocSectionSym = EmitSectionSym(Asm, TLOF.getDwarfLocSection(), + DwarfDebugLocSectionSym = emitSectionSym(Asm, TLOF.getDwarfLocSection(), "section_debug_loc"); - TextSectionSym = EmitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); - EmitSectionSym(Asm, TLOF.getDataSection()); + TextSectionSym = emitSectionSym(Asm, TLOF.getTextSection(), "text_begin"); + emitSectionSym(Asm, TLOF.getDataSection()); } -/// emitDIE - Recursively emits a debug information entry. -/// -void DwarfDebug::emitDIE(DIE *Die) { +// Recursively emits a debug information entry. +void DwarfDebug::emitDIE(DIE *Die, std::vector *Abbrevs) { // Get the abbreviation for this DIE. unsigned AbbrevNumber = Die->getAbbrevNumber(); - const DIEAbbrev *Abbrev = Abbreviations[AbbrevNumber - 1]; + const DIEAbbrev *Abbrev = Abbrevs->at(AbbrevNumber - 1); // Emit the code (index) for the abbreviation. if (Asm->isVerbose()) @@ -1665,8 +1767,8 @@ void DwarfDebug::emitDIE(DIE *Die) { dwarf::TagString(Abbrev->getTag())); Asm->EmitULEB128(AbbrevNumber); - const SmallVector &Values = Die->getValues(); - const SmallVector &AbbrevData = Abbrev->getData(); + const SmallVectorImpl &Values = Die->getValues(); + const SmallVectorImpl &AbbrevData = Abbrev->getData(); // Emit the DIE attribute values. for (unsigned i = 0, N = Values.size(); i < N; ++i) { @@ -1682,6 +1784,13 @@ void DwarfDebug::emitDIE(DIE *Die) { DIEEntry *E = cast(Values[i]); DIE *Origin = E->getEntry(); unsigned Addr = Origin->getOffset(); + if (Form == dwarf::DW_FORM_ref_addr) { + // For DW_FORM_ref_addr, output the offset from beginning of debug info + // section. Origin->getOffset() returns the offset from start of the + // compile unit. + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Addr += Holder.getCUOffset(Origin->getCompileUnit()); + } Asm->EmitInt32(Addr); break; } @@ -1732,7 +1841,7 @@ void DwarfDebug::emitDIE(DIE *Die) { const std::vector &Children = Die->getChildren(); for (unsigned j = 0, M = Children.size(); j < M; ++j) - emitDIE(Children[j]); + emitDIE(Children[j], Abbrevs); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("End Of Children Mark"); @@ -1740,20 +1849,22 @@ void DwarfDebug::emitDIE(DIE *Die) { } } -/// emitDebugInfo - Emit the debug info section. -/// -void DwarfDebug::emitDebugInfo() { - // Start debug info section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfInfoSection()); - for (DenseMap::iterator I = CUMap.begin(), - E = CUMap.end(); I != E; ++I) { - CompileUnit *TheCU = I->second; +// Emit the various dwarf units to the unit section USection with +// the abbreviations going into ASection. +void DwarfUnits::emitUnits(DwarfDebug *DD, + const MCSection *USection, + const MCSection *ASection, + const MCSymbol *ASectionSym) { + Asm->OutStreamer.SwitchSection(USection); + for (SmallVectorImpl::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; DIE *Die = TheCU->getCUDie(); // Emit the compile units header. - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_begin", - TheCU->getID())); + Asm->OutStreamer + .EmitLabel(Asm->GetTempSymbol(USection->getLabelBeginName(), + TheCU->getUniqueID())); // Emit size of content not including length itself unsigned ContentSize = Die->getSize() + @@ -1766,31 +1877,62 @@ void DwarfDebug::emitDebugInfo() { Asm->OutStreamer.AddComment("DWARF version number"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Offset Into Abbrev. Section"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("abbrev_begin"), - DwarfAbbrevSectionSym); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ASection->getLabelBeginName()), + ASectionSym); Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - emitDIE(Die); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("info_end", TheCU->getID())); + DD->emitDIE(Die, Abbreviations); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol(USection->getLabelEndName(), + TheCU->getUniqueID())); } } -/// emitAbbreviations - Emit the abbreviation section. -/// -void DwarfDebug::emitAbbreviations() const { +/// For a given compile unit DIE, returns offset from beginning of debug info. +unsigned DwarfUnits::getCUOffset(DIE *Die) { + assert(Die->getTag() == dwarf::DW_TAG_compile_unit && + "Input DIE should be compile unit in getCUOffset."); + for (SmallVectorImpl::iterator I = CUs.begin(), + E = CUs.end(); I != E; ++I) { + CompileUnit *TheCU = *I; + if (TheCU->getCUDie() == Die) + return TheCU->getDebugInfoOffset(); + } + llvm_unreachable("The compile unit DIE should belong to CUs in DwarfUnits."); +} + +// Emit the debug info section. +void DwarfDebug::emitDebugInfo() { + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + + Holder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoSection(), + Asm->getObjFileLowering().getDwarfAbbrevSection(), + DwarfAbbrevSectionSym); +} + +// Emit the abbreviation section. +void DwarfDebug::emitAbbreviations() { + if (!useSplitDwarf()) + emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection(), + &Abbreviations); + else + emitSkeletonAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevSection()); +} + +void DwarfDebug::emitAbbrevs(const MCSection *Section, + std::vector *Abbrevs) { // Check to see if it is worth the effort. - if (!Abbreviations.empty()) { + if (!Abbrevs->empty()) { // Start the debug abbrev section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfAbbrevSection()); + Asm->OutStreamer.SwitchSection(Section); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_begin")); + MCSymbol *Begin = Asm->GetTempSymbol(Section->getLabelBeginName()); + Asm->OutStreamer.EmitLabel(Begin); // For each abbrevation. - for (unsigned i = 0, N = Abbreviations.size(); i < N; ++i) { + for (unsigned i = 0, N = Abbrevs->size(); i < N; ++i) { // Get abbreviation data - const DIEAbbrev *Abbrev = Abbreviations[i]; + const DIEAbbrev *Abbrev = Abbrevs->at(i); // Emit the abbrevations code (base 1 index.) Asm->EmitULEB128(Abbrev->getNumber(), "Abbreviation Code"); @@ -1802,13 +1944,12 @@ void DwarfDebug::emitAbbreviations() const { // Mark end of abbreviations. Asm->EmitULEB128(0, "EOM(3)"); - Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("abbrev_end")); + MCSymbol *End = Asm->GetTempSymbol(Section->getLabelEndName()); + Asm->OutStreamer.EmitLabel(End); } } -/// emitEndOfLineMatrix - Emit the last address of the section and the end of -/// the line matrix. -/// +// Emit the last address of the section and the end of the line matrix. void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { // Define last address of section. Asm->OutStreamer.AddComment("Extended Op"); @@ -1822,8 +1963,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->OutStreamer.AddComment("Section end label"); Asm->OutStreamer.EmitSymbolValue(Asm->GetTempSymbol("section_end",SectionEnd), - Asm->getDataLayout().getPointerSize(), - 0/*AddrSpace*/); + Asm->getDataLayout().getPointerSize()); // Mark end of matrix. Asm->OutStreamer.AddComment("DW_LNE_end_sequence"); @@ -1832,8 +1972,7 @@ void DwarfDebug::emitEndOfLineMatrix(unsigned SectionEnd) { Asm->EmitInt8(1); } -/// emitAccelNames - Emit visible names into a hashed accelerator table -/// section. +// Emit visible names into a hashed accelerator table section. void DwarfDebug::emitAccelNames() { DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, dwarf::DW_FORM_data4)); @@ -1858,11 +1997,11 @@ void DwarfDebug::emitAccelNames() { Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, this); + AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitAccelObjC - Emit objective C classes and categories into a hashed -/// accelerator table section. +// Emit objective C classes and categories into a hashed accelerator table +// section. void DwarfDebug::emitAccelObjC() { DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, dwarf::DW_FORM_data4)); @@ -1887,11 +2026,10 @@ void DwarfDebug::emitAccelObjC() { Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, this); + AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitAccelNamespace - Emit namespace dies into a hashed accelerator -/// table. +// Emit namespace dies into a hashed accelerator table. void DwarfDebug::emitAccelNamespaces() { DwarfAccelTable AT(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, dwarf::DW_FORM_data4)); @@ -1916,10 +2054,10 @@ void DwarfDebug::emitAccelNamespaces() { Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, this); + AT.Emit(Asm, SectionBegin, &InfoHolder); } -/// emitAccelTypes() - Emit type dies into a hashed accelerator table. +// Emit type dies into a hashed accelerator table. void DwarfDebug::emitAccelTypes() { std::vector Atoms; Atoms.push_back(DwarfAccelTable::Atom(DwarfAccelTable::eAtomTypeDIEOffset, @@ -1951,7 +2089,62 @@ void DwarfDebug::emitAccelTypes() { Asm->OutStreamer.EmitLabel(SectionBegin); // Emit the full data. - AT.Emit(Asm, SectionBegin, this); + AT.Emit(Asm, SectionBegin, &InfoHolder); +} + +/// emitDebugPubnames - Emit visible names into a debug pubnames section. +/// +void DwarfDebug::emitDebugPubnames() { + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + + typedef DenseMap CUMapType; + for (CUMapType::iterator I = CUMap.begin(), E = CUMap.end(); I != E; ++I) { + CompileUnit *TheCU = I->second; + unsigned ID = TheCU->getUniqueID(); + + if (TheCU->getGlobalNames().empty()) + continue; + + // Start the dwarf pubnames section. + Asm->OutStreamer.SwitchSection( + Asm->getObjFileLowering().getDwarfPubNamesSection()); + + Asm->OutStreamer.AddComment("Length of Public Names Info"); + Asm->EmitLabelDifference(Asm->GetTempSymbol("pubnames_end", ID), + Asm->GetTempSymbol("pubnames_begin", ID), 4); + + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_begin", ID)); + + Asm->OutStreamer.AddComment("DWARF Version"); + Asm->EmitInt16(dwarf::DWARF_VERSION); + + Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + DwarfInfoSectionSym); + + Asm->OutStreamer.AddComment("Compilation Unit Length"); + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), ID), + Asm->GetTempSymbol(ISec->getLabelBeginName(), ID), + 4); + + const StringMap &Globals = TheCU->getGlobalNames(); + for (StringMap::const_iterator + GI = Globals.begin(), GE = Globals.end(); GI != GE; ++GI) { + const char *Name = GI->getKeyData(); + const DIE *Entity = GI->second; + + Asm->OutStreamer.AddComment("DIE offset"); + Asm->EmitInt32(Entity->getOffset()); + + if (Asm->isVerbose()) + Asm->OutStreamer.AddComment("External Name"); + Asm->OutStreamer.EmitBytes(StringRef(Name, strlen(Name)+1), 0); + } + + Asm->OutStreamer.AddComment("End Mark"); + Asm->EmitInt32(0); + Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubnames_end", ID)); + } } void DwarfDebug::emitDebugPubTypes() { @@ -1963,22 +2156,26 @@ void DwarfDebug::emitDebugPubTypes() { Asm->getObjFileLowering().getDwarfPubTypesSection()); Asm->OutStreamer.AddComment("Length of Public Types Info"); Asm->EmitLabelDifference( - Asm->GetTempSymbol("pubtypes_end", TheCU->getID()), - Asm->GetTempSymbol("pubtypes_begin", TheCU->getID()), 4); + Asm->GetTempSymbol("pubtypes_end", TheCU->getUniqueID()), + Asm->GetTempSymbol("pubtypes_begin", TheCU->getUniqueID()), 4); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_begin", - TheCU->getID())); + TheCU->getUniqueID())); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DWARF Version"); Asm->EmitInt16(dwarf::DWARF_VERSION); Asm->OutStreamer.AddComment("Offset of Compilation Unit Info"); - Asm->EmitSectionOffset(Asm->GetTempSymbol("info_begin", TheCU->getID()), + const MCSection *ISec = Asm->getObjFileLowering().getDwarfInfoSection(); + Asm->EmitSectionOffset(Asm->GetTempSymbol(ISec->getLabelBeginName(), + TheCU->getUniqueID()), DwarfInfoSectionSym); Asm->OutStreamer.AddComment("Compilation Unit Length"); - Asm->EmitLabelDifference(Asm->GetTempSymbol("info_end", TheCU->getID()), - Asm->GetTempSymbol("info_begin", TheCU->getID()), + Asm->EmitLabelDifference(Asm->GetTempSymbol(ISec->getLabelEndName(), + TheCU->getUniqueID()), + Asm->GetTempSymbol(ISec->getLabelBeginName(), + TheCU->getUniqueID()), 4); const StringMap &Globals = TheCU->getGlobalTypes(); @@ -1992,33 +2189,34 @@ void DwarfDebug::emitDebugPubTypes() { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("External Name"); // Emit the name with a terminating null byte. - Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1), 0); + Asm->OutStreamer.EmitBytes(StringRef(Name, GI->getKeyLength()+1)); } Asm->OutStreamer.AddComment("End Mark"); Asm->EmitInt32(0); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("pubtypes_end", - TheCU->getID())); + TheCU->getUniqueID())); } } -/// emitDebugStr - Emit visible names into a debug str section. -/// -void DwarfDebug::emitDebugStr() { - // Check to see if it is worth the effort. +// Emit strings into a string section. +void DwarfUnits::emitStrings(const MCSection *StrSection, + const MCSection *OffsetSection = NULL, + const MCSymbol *StrSecSym = NULL) { + if (StringPool.empty()) return; // Start the dwarf str section. - Asm->OutStreamer.SwitchSection( - Asm->getObjFileLowering().getDwarfStrSection()); + Asm->OutStreamer.SwitchSection(StrSection); // Get all of the string pool entries and put them in an array by their ID so // we can sort them. SmallVector >*>, 64> Entries; + StringMapEntry >*>, 64> Entries; for (StringMap >::iterator - I = StringPool.begin(), E = StringPool.end(); I != E; ++I) + I = StringPool.begin(), E = StringPool.end(); + I != E; ++I) Entries.push_back(std::make_pair(I->second.second, &*I)); array_pod_sort(Entries.begin(), Entries.end()); @@ -2029,18 +2227,65 @@ void DwarfDebug::emitDebugStr() { // Emit the string itself with a terminating null byte. Asm->OutStreamer.EmitBytes(StringRef(Entries[i].second->getKeyData(), - Entries[i].second->getKeyLength()+1), - 0/*addrspace*/); + Entries[i].second->getKeyLength()+1)); + } + + // If we've got an offset section go ahead and emit that now as well. + if (OffsetSection) { + Asm->OutStreamer.SwitchSection(OffsetSection); + unsigned offset = 0; + unsigned size = 4; // FIXME: DWARF64 is 8. + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + Asm->OutStreamer.EmitIntValue(offset, size); + offset += Entries[i].second->getKeyLength() + 1; + } } } -/// emitDebugLoc - Emit visible names into a debug loc section. -/// +// Emit strings into a string section. +void DwarfUnits::emitAddresses(const MCSection *AddrSection) { + + if (AddressPool.empty()) return; + + // Start the dwarf addr section. + Asm->OutStreamer.SwitchSection(AddrSection); + + // Get all of the string pool entries and put them in an array by their ID so + // we can sort them. + SmallVector* >, 64> Entries; + + for (DenseMap >::iterator + I = AddressPool.begin(), E = AddressPool.end(); + I != E; ++I) + Entries.push_back(std::make_pair(I->second.second, &(I->second))); + + array_pod_sort(Entries.begin(), Entries.end()); + + for (unsigned i = 0, e = Entries.size(); i != e; ++i) { + // Emit a label for reference from debug information entries. + MCSymbol *Sym = Entries[i].second->first; + if (Sym) + Asm->EmitLabelReference(Entries[i].second->first, + Asm->getDataLayout().getPointerSize()); + else + Asm->OutStreamer.EmitIntValue(0, Asm->getDataLayout().getPointerSize()); + } + +} + +// Emit visible names into a debug str section. +void DwarfDebug::emitDebugStr() { + DwarfUnits &Holder = useSplitDwarf() ? SkeletonHolder : InfoHolder; + Holder.emitStrings(Asm->getObjFileLowering().getDwarfStrSection()); +} + +// Emit visible names into a debug loc section. void DwarfDebug::emitDebugLoc() { if (DotDebugLocEntries.empty()) return; - for (SmallVector::iterator + for (SmallVectorImpl::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I) { DotDebugLocEntry &Entry = *I; @@ -2054,18 +2299,18 @@ void DwarfDebug::emitDebugLoc() { unsigned char Size = Asm->getDataLayout().getPointerSize(); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", 0)); unsigned index = 1; - for (SmallVector::iterator + for (SmallVectorImpl::iterator I = DotDebugLocEntries.begin(), E = DotDebugLocEntries.end(); I != E; ++I, ++index) { DotDebugLocEntry &Entry = *I; if (Entry.isMerged()) continue; if (Entry.isEmpty()) { - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); + Asm->OutStreamer.EmitIntValue(0, Size); Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_loc", index)); } else { - Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size, 0); - Asm->OutStreamer.EmitSymbolValue(Entry.End, Size, 0); + Asm->OutStreamer.EmitSymbolValue(Entry.Begin, Size); + Asm->OutStreamer.EmitSymbolValue(Entry.End, Size); DIVariable DV(Entry.Variable); Asm->OutStreamer.AddComment("Loc expr size"); MCSymbol *begin = Asm->OutStreamer.getContext().CreateTempSymbol(); @@ -2075,7 +2320,7 @@ void DwarfDebug::emitDebugLoc() { if (Entry.isInt()) { DIBasicType BTy(DV.getType()); if (BTy.Verify() && - (BTy.getEncoding() == dwarf::DW_ATE_signed + (BTy.getEncoding() == dwarf::DW_ATE_signed || BTy.getEncoding() == dwarf::DW_ATE_signed_char)) { Asm->OutStreamer.AddComment("DW_OP_consts"); Asm->EmitInt8(dwarf::DW_OP_consts); @@ -2086,7 +2331,7 @@ void DwarfDebug::emitDebugLoc() { Asm->EmitULEB128(Entry.getInt()); } } else if (Entry.isLocation()) { - if (!DV.hasComplexAddress()) + if (!DV.hasComplexAddress()) // Regular entry. Asm->EmitDwarfRegOp(Entry.Loc); else { @@ -2112,7 +2357,7 @@ void DwarfDebug::emitDebugLoc() { } else { Asm->EmitDwarfRegOp(Entry.Loc); } - + // Emit remaining complex address elements. for (; i < N; ++i) { uint64_t Element = DV.getAddrElement(i); @@ -2134,33 +2379,30 @@ void DwarfDebug::emitDebugLoc() { } } -/// EmitDebugARanges - Emit visible names into a debug aranges section. -/// -void DwarfDebug::EmitDebugARanges() { +// Emit visible names into a debug aranges section. +void DwarfDebug::emitDebugARanges() { // Start the dwarf aranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfARangesSection()); } -/// emitDebugRanges - Emit visible names into a debug ranges section. -/// +// Emit visible names into a debug ranges section. void DwarfDebug::emitDebugRanges() { // Start the dwarf ranges section. Asm->OutStreamer.SwitchSection( Asm->getObjFileLowering().getDwarfRangesSection()); unsigned char Size = Asm->getDataLayout().getPointerSize(); - for (SmallVector::iterator + for (SmallVectorImpl::iterator I = DebugRangeSymbols.begin(), E = DebugRangeSymbols.end(); I != E; ++I) { if (*I) - Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size, 0); + Asm->OutStreamer.EmitSymbolValue(const_cast(*I), Size); else - Asm->OutStreamer.EmitIntValue(0, Size, /*addrspace*/0); + Asm->OutStreamer.EmitIntValue(0, Size); } } -/// emitDebugMacInfo - Emit visible names into a debug macinfo section. -/// +// Emit visible names into a debug macinfo section. void DwarfDebug::emitDebugMacInfo() { if (const MCSection *LineInfo = Asm->getObjFileLowering().getDwarfMacroInfoSection()) { @@ -2169,24 +2411,24 @@ void DwarfDebug::emitDebugMacInfo() { } } -/// emitDebugInlineInfo - Emit inline info using following format. -/// Section Header: -/// 1. length of section -/// 2. Dwarf version number -/// 3. address size. -/// -/// Entries (one "entry" for each function that was inlined): -/// -/// 1. offset into __debug_str section for MIPS linkage name, if exists; -/// otherwise offset into __debug_str for regular function name. -/// 2. offset into __debug_str section for regular function name. -/// 3. an unsigned LEB128 number indicating the number of distinct inlining -/// instances for the function. -/// -/// The rest of the entry consists of a {die_offset, low_pc} pair for each -/// inlined instance; the die_offset points to the inlined_subroutine die in the -/// __debug_info section, and the low_pc is the starting address for the -/// inlining instance. +// Emit inline info using following format. +// Section Header: +// 1. length of section +// 2. Dwarf version number +// 3. address size. +// +// Entries (one "entry" for each function that was inlined): +// +// 1. offset into __debug_str section for MIPS linkage name, if exists; +// otherwise offset into __debug_str for regular function name. +// 2. offset into __debug_str section for regular function name. +// 3. an unsigned LEB128 number indicating the number of distinct inlining +// instances for the function. +// +// The rest of the entry consists of a {die_offset, low_pc} pair for each +// inlined instance; the die_offset points to the inlined_subroutine die in the +// __debug_info section, and the low_pc is the starting address for the +// inlining instance. void DwarfDebug::emitDebugInlineInfo() { if (!Asm->MAI->doesDwarfUseInlineInfoSection()) return; @@ -2208,38 +2450,121 @@ void DwarfDebug::emitDebugInlineInfo() { Asm->OutStreamer.AddComment("Address Size (in bytes)"); Asm->EmitInt8(Asm->getDataLayout().getPointerSize()); - for (SmallVector::iterator I = InlinedSPNodes.begin(), + for (SmallVectorImpl::iterator I = InlinedSPNodes.begin(), E = InlinedSPNodes.end(); I != E; ++I) { const MDNode *Node = *I; DenseMap >::iterator II = InlineInfo.find(Node); - SmallVector &Labels = II->second; + SmallVectorImpl &Labels = II->second; DISubprogram SP(Node); StringRef LName = SP.getLinkageName(); StringRef Name = SP.getName(); Asm->OutStreamer.AddComment("MIPS linkage name"); if (LName.empty()) - Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym); + Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), + DwarfStrSectionSym); else - Asm->EmitSectionOffset(getStringPoolEntry(getRealLinkageName(LName)), + Asm->EmitSectionOffset(InfoHolder + .getStringPoolEntry(getRealLinkageName(LName)), DwarfStrSectionSym); Asm->OutStreamer.AddComment("Function name"); - Asm->EmitSectionOffset(getStringPoolEntry(Name), DwarfStrSectionSym); + Asm->EmitSectionOffset(InfoHolder.getStringPoolEntry(Name), + DwarfStrSectionSym); Asm->EmitULEB128(Labels.size(), "Inline count"); - for (SmallVector::iterator LI = Labels.begin(), + for (SmallVectorImpl::iterator LI = Labels.begin(), LE = Labels.end(); LI != LE; ++LI) { if (Asm->isVerbose()) Asm->OutStreamer.AddComment("DIE offset"); Asm->EmitInt32(LI->second->getOffset()); if (Asm->isVerbose()) Asm->OutStreamer.AddComment("low_pc"); Asm->OutStreamer.EmitSymbolValue(LI->first, - Asm->getDataLayout().getPointerSize(),0); + Asm->getDataLayout().getPointerSize()); } } Asm->OutStreamer.EmitLabel(Asm->GetTempSymbol("debug_inlined_end", 1)); } + +// DWARF5 Experimental Separate Dwarf emitters. + +// This DIE has the following attributes: DW_AT_comp_dir, DW_AT_stmt_list, +// DW_AT_low_pc, DW_AT_high_pc, DW_AT_ranges, DW_AT_dwo_name, DW_AT_dwo_id, +// DW_AT_ranges_base, DW_AT_addr_base. If DW_AT_ranges is present, +// DW_AT_low_pc and DW_AT_high_pc are not used, and vice versa. +CompileUnit *DwarfDebug::constructSkeletonCU(const MDNode *N) { + DICompileUnit DIUnit(N); + CompilationDir = DIUnit.getDirectory(); + + DIE *Die = new DIE(dwarf::DW_TAG_compile_unit); + CompileUnit *NewCU = new CompileUnit(GlobalCUIndexCount++, + DIUnit.getLanguage(), Die, Asm, + this, &SkeletonHolder); + + NewCU->addLocalString(Die, dwarf::DW_AT_GNU_dwo_name, + DIUnit.getSplitDebugFilename()); + + // This should be a unique identifier when we want to build .dwp files. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_dwo_id, dwarf::DW_FORM_data8, 0); + + // FIXME: The addr base should be relative for each compile unit, however, + // this one is going to be 0 anyhow. + NewCU->addUInt(Die, dwarf::DW_AT_GNU_addr_base, dwarf::DW_FORM_sec_offset, 0); + + // 2.17.1 requires that we use DW_AT_low_pc for a single entry point + // into an entity. We're using 0, or a NULL label for this. + NewCU->addUInt(Die, dwarf::DW_AT_low_pc, dwarf::DW_FORM_addr, 0); + + // DW_AT_stmt_list is a offset of line number information for this + // compile unit in debug_line section. + if (Asm->MAI->doesDwarfUseRelocationsAcrossSections()) + NewCU->addLabel(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, + DwarfLineSectionSym); + else + NewCU->addUInt(Die, dwarf::DW_AT_stmt_list, dwarf::DW_FORM_sec_offset, 0); + + if (!CompilationDir.empty()) + NewCU->addLocalString(Die, dwarf::DW_AT_comp_dir, CompilationDir); + + SkeletonHolder.addUnit(NewCU); + SkeletonCUs.push_back(NewCU); + + return NewCU; +} + +void DwarfDebug::emitSkeletonAbbrevs(const MCSection *Section) { + assert(useSplitDwarf() && "No split dwarf debug info?"); + emitAbbrevs(Section, &SkeletonAbbrevs); +} + +// Emit the .debug_info.dwo section for separated dwarf. This contains the +// compile units that would normally be in debug_info. +void DwarfDebug::emitDebugInfoDWO() { + assert(useSplitDwarf() && "No split dwarf debug info?"); + InfoHolder.emitUnits(this, Asm->getObjFileLowering().getDwarfInfoDWOSection(), + Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), + DwarfAbbrevDWOSectionSym); +} + +// Emit the .debug_abbrev.dwo section for separated dwarf. This contains the +// abbreviations for the .debug_info.dwo section. +void DwarfDebug::emitDebugAbbrevDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + emitAbbrevs(Asm->getObjFileLowering().getDwarfAbbrevDWOSection(), + &Abbreviations); +} + +// Emit the .debug_str.dwo section for separated dwarf. This contains the +// string section and is identical in format to traditional .debug_str +// sections. +void DwarfDebug::emitDebugStrDWO() { + assert(useSplitDwarf() && "No split dwarf?"); + const MCSection *OffSec = Asm->getObjFileLowering() + .getDwarfStrOffDWOSection(); + const MCSymbol *StrSym = DwarfStrSectionSym; + InfoHolder.emitStrings(Asm->getObjFileLowering().getDwarfStrDWOSection(), + OffSec, StrSym); +} diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.h b/lib/CodeGen/AsmPrinter/DwarfDebug.h index 61d9a51a5279..81e345e6281d 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.h +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.h @@ -15,15 +15,15 @@ #define CODEGEN_ASMPRINTER_DWARFDEBUG_H__ #include "DIE.h" -#include "llvm/DebugInfo.h" -#include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/MC/MachineLocation.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/StringMap.h" +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/LexicalScopes.h" +#include "llvm/DebugInfo.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/DebugLoc.h" @@ -41,10 +41,10 @@ class DIEAbbrev; class DIE; class DIEBlock; class DIEEntry; +class DwarfDebug; //===----------------------------------------------------------------------===// -/// SrcLineInfo - This class is used to record source line correspondence. -/// +/// \brief This class is used to record source line correspondence. class SrcLineInfo { unsigned Line; // Source line number. unsigned Column; // Source column. @@ -61,8 +61,8 @@ public: MCSymbol *getLabel() const { return Label; } }; -/// DotDebugLocEntry - This struct describes location entries emitted in -/// .debug_loc section. +/// \brief This struct describes location entries emitted in the .debug_loc +/// section. typedef struct DotDebugLocEntry { const MCSymbol *Begin; const MCSymbol *End; @@ -83,25 +83,25 @@ typedef struct DotDebugLocEntry { const ConstantFP *CFP; const ConstantInt *CIP; } Constants; - DotDebugLocEntry() - : Begin(0), End(0), Variable(0), Merged(false), + DotDebugLocEntry() + : Begin(0), End(0), Variable(0), Merged(false), Constant(false) { Constants.Int = 0;} DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, MachineLocation &L, - const MDNode *V) - : Begin(B), End(E), Loc(L), Variable(V), Merged(false), + const MDNode *V) + : Begin(B), End(E), Loc(L), Variable(V), Merged(false), Constant(false) { Constants.Int = 0; EntryKind = E_Location; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, int64_t i) - : Begin(B), End(E), Variable(0), Merged(false), + : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.Int = i; EntryKind = E_Integer; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantFP *FPtr) - : Begin(B), End(E), Variable(0), Merged(false), + : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CFP = FPtr; EntryKind = E_ConstantFP; } DotDebugLocEntry(const MCSymbol *B, const MCSymbol *E, const ConstantInt *IPtr) - : Begin(B), End(E), Variable(0), Merged(false), + : Begin(B), End(E), Variable(0), Merged(false), Constant(true) { Constants.CIP = IPtr; EntryKind = E_ConstantInt; } - /// Empty entries are also used as a trigger to emit temp label. Such + /// \brief Empty entries are also used as a trigger to emit temp label. Such /// labels are referenced is used to find debug_loc offset for a given DIE. bool isEmpty() { return Begin == 0 && End == 0; } bool isMerged() { return Merged; } @@ -121,8 +121,7 @@ typedef struct DotDebugLocEntry { } DotDebugLocEntry; //===----------------------------------------------------------------------===// -/// DbgVariable - This class is used to track local variable information. -/// +/// \brief This class is used to track local variable information. class DbgVariable { DIVariable Var; // Variable Descriptor. DIE *TheDIE; // Variable DIE. @@ -132,7 +131,7 @@ class DbgVariable { int FrameIndex; public: // AbsVar may be NULL. - DbgVariable(DIVariable V, DbgVariable *AV) + DbgVariable(DIVariable V, DbgVariable *AV) : Var(V), TheDIE(0), DotDebugLocOffset(~0U), AbsVar(AV), MInsn(0), FrameIndex(~0) {} @@ -148,14 +147,14 @@ public: void setMInsn(const MachineInstr *M) { MInsn = M; } int getFrameIndex() const { return FrameIndex; } void setFrameIndex(int FI) { FrameIndex = FI; } - // Translate tag to proper Dwarf tag. - unsigned getTag() const { + // Translate tag to proper Dwarf tag. + unsigned getTag() const { if (Var.getTag() == dwarf::DW_TAG_arg_variable) return dwarf::DW_TAG_formal_parameter; - + return dwarf::DW_TAG_variable; } - /// isArtificial - Return true if DbgVariable is artificial. + /// \brief Return true if DbgVariable is artificial. bool isArtificial() const { if (Var.isArtificial()) return true; @@ -171,7 +170,7 @@ public: return true; return false; } - + bool variableHasComplexAddress() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.hasComplexAddress(); @@ -180,7 +179,7 @@ public: assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.isBlockByrefVariable(); } - unsigned getNumAddrElements() const { + unsigned getNumAddrElements() const { assert(Var.Verify() && "Invalid complex DbgVariable!"); return Var.getNumAddrElements(); } @@ -190,108 +189,192 @@ public: DIType getType() const; }; + +// A String->Symbol mapping of strings used by indirect +// references. +typedef StringMap, + BumpPtrAllocator&> StrPool; + +// A Symbol->pair mapping of addresses used by indirect +// references. +typedef DenseMap > AddrPool; + +/// \brief Collects and handles information specific to a particular +/// collection of units. +class DwarfUnits { + // Target of Dwarf emission, used for sizing of abbreviations. + AsmPrinter *Asm; + + // Used to uniquely define abbreviations. + FoldingSet *AbbreviationsSet; + + // A list of all the unique abbreviations in use. + std::vector *Abbreviations; + + // A pointer to all units in the section. + SmallVector CUs; + + // Collection of strings for this unit and assorted symbols. + StrPool StringPool; + unsigned NextStringPoolNumber; + std::string StringPref; + + // Collection of addresses for this unit and assorted labels. + AddrPool AddressPool; + unsigned NextAddrPoolNumber; + +public: + DwarfUnits(AsmPrinter *AP, FoldingSet *AS, + std::vector *A, const char *Pref, + BumpPtrAllocator &DA) : + Asm(AP), AbbreviationsSet(AS), Abbreviations(A), + StringPool(DA), NextStringPoolNumber(0), StringPref(Pref), + AddressPool(), NextAddrPoolNumber(0) {} + + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); + + /// \brief Compute the size and offset of all the DIEs. + void computeSizeAndOffsets(); + + /// \brief Define a unique number for the abbreviation. + void assignAbbrevNumber(DIEAbbrev &Abbrev); + + /// \brief Add a unit to the list of CUs. + void addUnit(CompileUnit *CU) { CUs.push_back(CU); } + + /// \brief Emit all of the units to the section listed with the given + /// abbreviation section. + void emitUnits(DwarfDebug *, const MCSection *, const MCSection *, + const MCSymbol *); + + /// \brief Emit all of the strings to the section given. + void emitStrings(const MCSection *, const MCSection *, const MCSymbol *); + + /// \brief Emit all of the addresses to the section given. + void emitAddresses(const MCSection *); + + /// \brief Returns the entry into the start of the pool. + MCSymbol *getStringPoolSym(); + + /// \brief Returns an entry into the string pool with the given + /// string text. + MCSymbol *getStringPoolEntry(StringRef Str); + + /// \brief Returns the index into the string pool with the given + /// string text. + unsigned getStringPoolIndex(StringRef Str); + + /// \brief Returns the string pool. + StrPool *getStringPool() { return &StringPool; } + + /// \brief Returns the index into the address pool with the given + /// label/symbol. + unsigned getAddrPoolIndex(MCSymbol *); + + /// \brief Returns the address pool. + AddrPool *getAddrPool() { return &AddressPool; } + + /// \brief for a given compile unit DIE, returns offset from beginning of + /// debug info. + unsigned getCUOffset(DIE *Die); +}; + +/// \brief Collects and handles dwarf debug information. class DwarfDebug { - /// Asm - Target of Dwarf emission. + // Target of Dwarf emission. AsmPrinter *Asm; - /// MMI - Collected machine module information. + // Collected machine module information. MachineModuleInfo *MMI; - /// DIEValueAllocator - All DIEValues are allocated through this allocator. + // All DIEValues are allocated through this allocator. BumpPtrAllocator DIEValueAllocator; //===--------------------------------------------------------------------===// - // Attributes used to construct specific Dwarf sections. + // Attribute used to construct specific Dwarf sections. // CompileUnit *FirstCU; - /// Maps MDNode with its corresponding CompileUnit. + // Maps MDNode with its corresponding CompileUnit. DenseMap CUMap; - /// Maps subprogram MDNode with its corresponding CompileUnit. + // Maps subprogram MDNode with its corresponding CompileUnit. DenseMap SPMap; - /// AbbreviationsSet - Used to uniquely define abbreviations. - /// + // Used to uniquely define abbreviations. FoldingSet AbbreviationsSet; - /// Abbreviations - A list of all the unique abbreviations in use. - /// + // A list of all the unique abbreviations in use. std::vector Abbreviations; - /// SourceIdMap - Source id map, i.e. pair of source filename and directory, - /// separated by a zero byte, mapped to a unique id. + // Stores the current file ID for a given compile unit. + DenseMap FileIDCUMap; + // Source id map, i.e. CUID, source filename and directory, + // separated by a zero byte, mapped to a unique id. StringMap SourceIdMap; - /// StringPool - A String->Symbol mapping of strings used by indirect - /// references. - StringMap, BumpPtrAllocator&> StringPool; - unsigned NextStringPoolNumber; - - /// SectionMap - Provides a unique id per text section. - /// + // Provides a unique id per text section. SetVector SectionMap; - /// CurrentFnArguments - List of Arguments (DbgValues) for current function. + // List of Arguments (DbgValues) for current function. SmallVector CurrentFnArguments; LexicalScopes LScopes; - /// AbstractSPDies - Collection of abstract subprogram DIEs. + // Collection of abstract subprogram DIEs. DenseMap AbstractSPDies; - /// ScopeVariables - Collection of dbg variables of a scope. + // Collection of dbg variables of a scope. DenseMap > ScopeVariables; - /// AbstractVariables - Collection of abstract variables. + // Collection of abstract variables. DenseMap AbstractVariables; - /// DotDebugLocEntries - Collection of DotDebugLocEntry. + // Collection of DotDebugLocEntry. SmallVector DotDebugLocEntries; - /// InlinedSubprogramDIEs - Collection of subprogram DIEs that are marked - /// (at the end of the module) as DW_AT_inline. + // Collection of subprogram DIEs that are marked (at the end of the module) + // as DW_AT_inline. SmallPtrSet InlinedSubprogramDIEs; - /// InlineInfo - Keep track of inlined functions and their location. This - /// information is used to populate the debug_inlined section. + // Keep track of inlined functions and their location. This + // information is used to populate the debug_inlined section. typedef std::pair InlineInfoLabels; DenseMap > InlineInfo; SmallVector InlinedSPNodes; - // ProcessedSPNodes - This is a collection of subprogram MDNodes that - // are processed to create DIEs. + // This is a collection of subprogram MDNodes that are processed to + // create DIEs. SmallPtrSet ProcessedSPNodes; - /// LabelsBeforeInsn - Maps instruction with label emitted before - /// instruction. + // Maps instruction with label emitted before instruction. DenseMap LabelsBeforeInsn; - /// LabelsAfterInsn - Maps instruction with label emitted after - /// instruction. + // Maps instruction with label emitted after instruction. DenseMap LabelsAfterInsn; - /// UserVariables - Every user variable mentioned by a DBG_VALUE instruction - /// in order of appearance. + // Every user variable mentioned by a DBG_VALUE instruction in order of + // appearance. SmallVector UserVariables; - /// DbgValues - For each user variable, keep a list of DBG_VALUE - /// instructions in order. The list can also contain normal instructions that - /// clobber the previous DBG_VALUE. + // For each user variable, keep a list of DBG_VALUE instructions in order. + // The list can also contain normal instructions that clobber the previous + // DBG_VALUE. typedef DenseMap > DbgValueHistoryMap; DbgValueHistoryMap DbgValues; SmallVector DebugRangeSymbols; - /// Previous instruction's location information. This is used to determine - /// label location to indicate scope boundries in dwarf debug info. + // Previous instruction's location information. This is used to determine + // label location to indicate scope boundries in dwarf debug info. DebugLoc PrevInstLoc; MCSymbol *PrevLabel; - /// PrologEndLoc - This location indicates end of function prologue and - /// beginning of function body. + // This location indicates end of function prologue and beginning of function + // body. DebugLoc PrologEndLoc; struct FunctionDebugFrameInfo { @@ -309,180 +392,208 @@ class DwarfDebug { // section offsets and are created by EmitSectionLabels. MCSymbol *DwarfInfoSectionSym, *DwarfAbbrevSectionSym; MCSymbol *DwarfStrSectionSym, *TextSectionSym, *DwarfDebugRangeSectionSym; - MCSymbol *DwarfDebugLocSectionSym; + MCSymbol *DwarfDebugLocSectionSym, *DwarfLineSectionSym; MCSymbol *FunctionBeginSym, *FunctionEndSym; + MCSymbol *DwarfAbbrevDWOSectionSym, *DwarfStrDWOSectionSym; // As an optimization, there is no need to emit an entry in the directory // table for the same directory as DW_at_comp_dir. StringRef CompilationDir; - // A holder for the DarwinGDBCompat flag so that the compile unit can use it. - bool isDarwinGDBCompat; - bool hasDwarfAccelTables; -private: + // Counter for assigning globally unique IDs for CUs. + unsigned GlobalCUIndexCount; - /// assignAbbrevNumber - Define a unique number for the abbreviation. - /// - void assignAbbrevNumber(DIEAbbrev &Abbrev); + // Holder for the file specific debug information. + DwarfUnits InfoHolder; + + // Holders for the various debug information flags that we might need to + // have exposed. See accessor functions below for description. + + // Whether or not we're emitting info for older versions of gdb on darwin. + bool IsDarwinGDBCompat; + + // DWARF5 Experimental Options + bool HasDwarfAccelTables; + bool HasSplitDwarf; + + // Separated Dwarf Variables + // In general these will all be for bits that are left in the + // original object file, rather than things that are meant + // to be in the .dwo sections. + + // The CUs left in the original object file for separated debug info. + SmallVector SkeletonCUs; + + // Used to uniquely define abbreviations for the skeleton emission. + FoldingSet SkeletonAbbrevSet; + + // A list of all the unique abbreviations in use. + std::vector SkeletonAbbrevs; + + // Holder for the skeleton information. + DwarfUnits SkeletonHolder; + +private: void addScopeVariable(LexicalScope *LS, DbgVariable *Var); - /// findAbstractVariable - Find abstract variable associated with Var. + /// \brief Find abstract variable associated with Var. DbgVariable *findAbstractVariable(DIVariable &Var, DebugLoc Loc); - /// updateSubprogramScopeDIE - Find DIE for the given subprogram and - /// attach appropriate DW_AT_low_pc and DW_AT_high_pc attributes. - /// If there are global variables in this scope then create and insert - /// DIEs for these variables. + /// \brief Find DIE for the given subprogram and attach appropriate + /// DW_AT_low_pc and DW_AT_high_pc attributes. If there are global + /// variables in this scope then create and insert DIEs for these + /// variables. DIE *updateSubprogramScopeDIE(CompileUnit *SPCU, const MDNode *SPNode); - /// constructLexicalScope - Construct new DW_TAG_lexical_block - /// for this scope and attach DW_AT_low_pc/DW_AT_high_pc labels. + /// \brief Construct new DW_TAG_lexical_block for this scope and + /// attach DW_AT_low_pc/DW_AT_high_pc labels. DIE *constructLexicalScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// constructInlinedScopeDIE - This scope represents inlined body of - /// a function. Construct DIE to represent this concrete inlined copy - /// of the function. + /// \brief This scope represents inlined body of a function. Construct + /// DIE to represent this concrete inlined copy of the function. DIE *constructInlinedScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// constructScopeDIE - Construct a DIE for this scope. + /// \brief Construct a DIE for this scope. DIE *constructScopeDIE(CompileUnit *TheCU, LexicalScope *Scope); - /// EmitSectionLabels - Emit initial Dwarf sections with a label at - /// the start of each one. - void EmitSectionLabels(); + /// \brief Emit initial Dwarf sections with a label at the start of each one. + void emitSectionLabels(); - /// emitDIE - Recursively Emits a debug information entry. - /// - void emitDIE(DIE *Die); + /// \brief Compute the size and offset of a DIE given an incoming Offset. + unsigned computeSizeAndOffset(DIE *Die, unsigned Offset); - /// computeSizeAndOffset - Compute the size and offset of a DIE. - /// - unsigned computeSizeAndOffset(DIE *Die, unsigned Offset, bool Last); - - /// computeSizeAndOffsets - Compute the size and offset of all the DIEs. - /// + /// \brief Compute the size and offset of all the DIEs. void computeSizeAndOffsets(); - /// EmitDebugInfo - Emit the debug info section. - /// + /// \brief Attach DW_AT_inline attribute with inlined subprogram DIEs. + void computeInlinedDIEs(); + + /// \brief Collect info for variables that were optimized out. + void collectDeadVariables(); + + /// \brief Finish off debug information after all functions have been + /// processed. + void finalizeModuleInfo(); + + /// \brief Emit labels to close any remaining sections that have been left + /// open. + void endSections(); + + /// \brief Emit a set of abbreviations to the specific section. + void emitAbbrevs(const MCSection *, std::vector *); + + /// \brief Emit the debug info section. void emitDebugInfo(); - /// emitAbbreviations - Emit the abbreviation section. - /// - void emitAbbreviations() const; + /// \brief Emit the abbreviation section. + void emitAbbreviations(); - /// emitEndOfLineMatrix - Emit the last address of the section and the end of + /// \brief Emit the last address of the section and the end of /// the line matrix. - /// void emitEndOfLineMatrix(unsigned SectionEnd); - /// emitAccelNames - Emit visible names into a hashed accelerator table - /// section. + /// \brief Emit visible names into a hashed accelerator table section. void emitAccelNames(); - - /// emitAccelObjC - Emit objective C classes and categories into a hashed + + /// \brief Emit objective C classes and categories into a hashed /// accelerator table section. void emitAccelObjC(); - /// emitAccelNamespace - Emit namespace dies into a hashed accelerator - /// table. + /// \brief Emit namespace dies into a hashed accelerator table. void emitAccelNamespaces(); - /// emitAccelTypes() - Emit type dies into a hashed accelerator table. - /// + /// \brief Emit type dies into a hashed accelerator table. void emitAccelTypes(); - - /// emitDebugPubTypes - Emit visible types into a debug pubtypes section. - /// + + /// \brief Emit visible names into a debug pubnames section. + void emitDebugPubnames(); + + /// \brief Emit visible types into a debug pubtypes section. void emitDebugPubTypes(); - /// emitDebugStr - Emit visible names into a debug str section. - /// + /// \brief Emit visible names into a debug str section. void emitDebugStr(); - /// emitDebugLoc - Emit visible names into a debug loc section. - /// + /// \brief Emit visible names into a debug loc section. void emitDebugLoc(); - /// EmitDebugARanges - Emit visible names into a debug aranges section. - /// - void EmitDebugARanges(); + /// \brief Emit visible names into a debug aranges section. + void emitDebugARanges(); - /// emitDebugRanges - Emit visible names into a debug ranges section. - /// + /// \brief Emit visible names into a debug ranges section. void emitDebugRanges(); - /// emitDebugMacInfo - Emit visible names into a debug macinfo section. - /// + /// \brief Emit visible names into a debug macinfo section. void emitDebugMacInfo(); - /// emitDebugInlineInfo - Emit inline info using following format. - /// Section Header: - /// 1. length of section - /// 2. Dwarf version number - /// 3. address size. - /// - /// Entries (one "entry" for each function that was inlined): - /// - /// 1. offset into __debug_str section for MIPS linkage name, if exists; - /// otherwise offset into __debug_str for regular function name. - /// 2. offset into __debug_str section for regular function name. - /// 3. an unsigned LEB128 number indicating the number of distinct inlining - /// instances for the function. - /// - /// The rest of the entry consists of a {die_offset, low_pc} pair for each - /// inlined instance; the die_offset points to the inlined_subroutine die in - /// the __debug_info section, and the low_pc is the starting address for the - /// inlining instance. + /// \brief Emit inline info using custom format. void emitDebugInlineInfo(); - /// constructCompileUnit - Create new CompileUnit for the given - /// metadata node with tag DW_TAG_compile_unit. + /// DWARF 5 Experimental Split Dwarf Emitters + + /// \brief Construct the split debug info compile unit for the debug info + /// section. + CompileUnit *constructSkeletonCU(const MDNode *); + + /// \brief Emit the local split abbreviations. + void emitSkeletonAbbrevs(const MCSection *); + + /// \brief Emit the debug info dwo section. + void emitDebugInfoDWO(); + + /// \brief Emit the debug abbrev dwo section. + void emitDebugAbbrevDWO(); + + /// \brief Emit the debug str dwo section. + void emitDebugStrDWO(); + + /// \brief Create new CompileUnit for the given metadata node with tag + /// DW_TAG_compile_unit. CompileUnit *constructCompileUnit(const MDNode *N); - /// construct SubprogramDIE - Construct subprogram DIE. + /// \brief Construct subprogram DIE. void constructSubprogramDIE(CompileUnit *TheCU, const MDNode *N); - /// recordSourceLine - Register a source line with debug info. Returns the - /// unique label that was emitted and which provides correspondence to - /// the source line list. + /// \brief Register a source line with debug info. Returns the unique + /// label that was emitted and which provides correspondence to the + /// source line list. void recordSourceLine(unsigned Line, unsigned Col, const MDNode *Scope, unsigned Flags); - - /// identifyScopeMarkers() - Indentify instructions that are marking the - /// beginning of or ending of a scope. + + /// \brief Indentify instructions that are marking the beginning of or + /// ending of a scope. void identifyScopeMarkers(); - /// addCurrentFnArgument - If Var is an current function argument that add - /// it in CurrentFnArguments list. + /// \brief If Var is an current function argument that add it in + /// CurrentFnArguments list. bool addCurrentFnArgument(const MachineFunction *MF, DbgVariable *Var, LexicalScope *Scope); - /// collectVariableInfo - Populate LexicalScope entries with variables' info. + /// \brief Populate LexicalScope entries with variables' info. void collectVariableInfo(const MachineFunction *, SmallPtrSet &ProcessedVars); - - /// collectVariableInfoFromMMITable - Collect variable information from - /// side table maintained by MMI. + + /// \brief Collect variable information from the side table maintained + /// by MMI. void collectVariableInfoFromMMITable(const MachineFunction * MF, SmallPtrSet &P); - /// requestLabelBeforeInsn - Ensure that a label will be emitted before MI. + /// \brief Ensure that a label will be emitted before MI. void requestLabelBeforeInsn(const MachineInstr *MI) { LabelsBeforeInsn.insert(std::make_pair(MI, (MCSymbol*)0)); } - /// getLabelBeforeInsn - Return Label preceding the instruction. - const MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); + /// \brief Return Label preceding the instruction. + MCSymbol *getLabelBeforeInsn(const MachineInstr *MI); - /// requestLabelAfterInsn - Ensure that a label will be emitted after MI. + /// \brief Ensure that a label will be emitted after MI. void requestLabelAfterInsn(const MachineInstr *MI) { LabelsAfterInsn.insert(std::make_pair(MI, (MCSymbol*)0)); } - /// getLabelAfterInsn - Return Label immediately following the instruction. - const MCSymbol *getLabelAfterInsn(const MachineInstr *MI); + /// \brief Return Label immediately following the instruction. + MCSymbol *getLabelAfterInsn(const MachineInstr *MI); public: //===--------------------------------------------------------------------===// @@ -491,52 +602,47 @@ public: DwarfDebug(AsmPrinter *A, Module *M); ~DwarfDebug(); - /// collectInfoFromNamedMDNodes - Collect debug info from named mdnodes such - /// as llvm.dbg.enum and llvm.dbg.ty - void collectInfoFromNamedMDNodes(Module *M); - - /// collectLegacyDebugInfo - Collect debug info using DebugInfoFinder. - /// FIXME - Remove this when DragonEgg switches to DIBuilder. - bool collectLegacyDebugInfo(Module *M); - - /// beginModule - Emit all Dwarf sections that should come prior to the + /// \brief Emit all Dwarf sections that should come prior to the /// content. - void beginModule(Module *M); + void beginModule(); - /// endModule - Emit all Dwarf sections that should come after the content. - /// + /// \brief Emit all Dwarf sections that should come after the content. void endModule(); - /// beginFunction - Gather pre-function debug information. Assumes being - /// emitted immediately after the function entry point. + /// \brief Gather pre-function debug information. void beginFunction(const MachineFunction *MF); - /// endFunction - Gather and emit post-function debug information. - /// + /// \brief Gather and emit post-function debug information. void endFunction(const MachineFunction *MF); - /// beginInstruction - Process beginning of an instruction. + /// \brief Process beginning of an instruction. void beginInstruction(const MachineInstr *MI); - /// endInstruction - Prcess end of an instruction. + /// \brief Process end of an instruction. void endInstruction(const MachineInstr *MI); - /// GetOrCreateSourceID - Look up the source id with the given directory and - /// source file names. If none currently exists, create a new id and insert it - /// in the SourceIds map. - unsigned GetOrCreateSourceID(StringRef DirName, StringRef FullName); - - /// getStringPool - returns the entry into the start of the pool. - MCSymbol *getStringPool(); + /// \brief Look up the source id with the given directory and source file + /// names. If none currently exists, create a new id and insert it in the + /// SourceIds map. + unsigned getOrCreateSourceID(StringRef DirName, StringRef FullName, + unsigned CUID); - /// getStringPoolEntry - returns an entry into the string pool with the given - /// string text. - MCSymbol *getStringPoolEntry(StringRef Str); + /// \brief Recursively Emits a debug information entry. + void emitDIE(DIE *Die, std::vector *Abbrevs); - /// useDarwinGDBCompat - returns whether or not to limit some of our debug + /// \brief Returns whether or not to limit some of our debug /// output to the limitations of darwin gdb. - bool useDarwinGDBCompat() { return isDarwinGDBCompat; } - bool useDwarfAccelTables() { return hasDwarfAccelTables; } + bool useDarwinGDBCompat() { return IsDarwinGDBCompat; } + + // Experimental DWARF5 features. + + /// \brief Returns whether or not to emit tables that dwarf consumers can + /// use to accelerate lookup. + bool useDwarfAccelTables() { return HasDwarfAccelTables; } + + /// \brief Returns whether or not to change the current debug info for the + /// split dwarf proposal support. + bool useSplitDwarf() { return HasSplitDwarf; } }; } // End of namespace llvm diff --git a/lib/CodeGen/AsmPrinter/DwarfException.cpp b/lib/CodeGen/AsmPrinter/DwarfException.cpp index 08fb6b3f52c5..7133458129cc 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfException.cpp @@ -12,30 +12,29 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" using namespace llvm; DwarfException::DwarfException(AsmPrinter *A) @@ -608,7 +607,7 @@ void DwarfException::EmitExceptionTable() { if (!S.PadLabel) { if (VerboseAsm) Asm->OutStreamer.AddComment(" has no landing pad"); - Asm->OutStreamer.EmitIntValue(0, 4/*size*/, 0/*addrspace*/); + Asm->OutStreamer.EmitIntValue(0, 4/*size*/); } else { if (VerboseAsm) Asm->OutStreamer.AddComment(Twine(" jumps to ") + @@ -672,6 +671,18 @@ void DwarfException::EmitExceptionTable() { Asm->EmitSLEB128(Action.NextAction); } + EmitTypeInfos(TTypeEncoding); + + Asm->EmitAlignment(2); +} + +void DwarfException::EmitTypeInfos(unsigned TTypeEncoding) { + const std::vector &TypeInfos = MMI->getTypeInfos(); + const std::vector &FilterIds = MMI->getFilterIds(); + + bool VerboseAsm = Asm->OutStreamer.isVerboseAsm(); + + int Entry = 0; // Emit the Catch TypeInfos. if (VerboseAsm && !TypeInfos.empty()) { Asm->OutStreamer.AddComment(">> Catch TypeInfos <<"); @@ -684,11 +695,7 @@ void DwarfException::EmitExceptionTable() { const GlobalVariable *GV = *I; if (VerboseAsm) Asm->OutStreamer.AddComment("TypeInfo " + Twine(Entry--)); - if (GV) - Asm->EmitReference(GV, TTypeEncoding); - else - Asm->OutStreamer.EmitIntValue(0,Asm->GetSizeOfEncodedValue(TTypeEncoding), - 0); + Asm->EmitTTypeReference(GV, TTypeEncoding); } // Emit the Exception Specifications. @@ -708,8 +715,6 @@ void DwarfException::EmitExceptionTable() { Asm->EmitULEB128(TypeID); } - - Asm->EmitAlignment(2); } /// EndModule - Emit all exception information that should come after the diff --git a/lib/CodeGen/AsmPrinter/DwarfException.h b/lib/CodeGen/AsmPrinter/DwarfException.h index fe9e49360951..74b1b13367a2 100644 --- a/lib/CodeGen/AsmPrinter/DwarfException.h +++ b/lib/CodeGen/AsmPrinter/DwarfException.h @@ -121,6 +121,8 @@ protected: /// catches in the function. This tables is reversed indexed base 1. void EmitExceptionTable(); + virtual void EmitTypeInfos(unsigned TTypeEncoding); + public: //===--------------------------------------------------------------------===// // Main entry points. @@ -175,6 +177,7 @@ public: }; class ARMException : public DwarfException { + void EmitTypeInfos(unsigned TTypeEncoding); public: //===--------------------------------------------------------------------===// // Main entry points. diff --git a/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp new file mode 100644 index 000000000000..a8fb66dcf17b --- /dev/null +++ b/lib/CodeGen/AsmPrinter/ErlangGCPrinter.cpp @@ -0,0 +1,120 @@ +//===-- ErlangGCPrinter.cpp - Erlang/OTP frametable emitter -----*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the compiler plugin that is used in order to emit +// garbage collection information in a convenient layout for parsing and +// loading in the Erlang/OTP runtime. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/AsmPrinter.h" +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCMetadataPrinter.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instruction.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Metadata.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGCPrinter : public GCMetadataPrinter { + public: + void beginAssembly(AsmPrinter &AP); + void finishAssembly(AsmPrinter &AP); + }; + +} + +static GCMetadataPrinterRegistry::Add +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGCPrinter() { } + +void ErlangGCPrinter::beginAssembly(AsmPrinter &AP) { } + +void ErlangGCPrinter::finishAssembly(AsmPrinter &AP) { + MCStreamer &OS = AP.OutStreamer; + unsigned IntPtrSize = AP.TM.getDataLayout()->getPointerSize(); + + // Put this in a custom .note section. + AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getContext() + .getELFSection(".note.gc", ELF::SHT_PROGBITS, 0, + SectionKind::getDataRel())); + + // For each function... + for (iterator FI = begin(), FE = end(); FI != FE; ++FI) { + GCFunctionInfo &MD = **FI; + + /** A compact GC layout. Emit this data structure: + * + * struct { + * int16_t PointCount; + * void *SafePointAddress[PointCount]; + * int16_t StackFrameSize; (in words) + * int16_t StackArity; + * int16_t LiveCount; + * int16_t LiveOffsets[LiveCount]; + * } __gcmap_; + **/ + + // Align to address width. + AP.EmitAlignment(IntPtrSize == 4 ? 2 : 3); + + // Emit PointCount. + OS.AddComment("safe point count"); + AP.EmitInt16(MD.size()); + + // And each safe point... + for (GCFunctionInfo::iterator PI = MD.begin(), PE = MD.end(); PI != PE; + ++PI) { + // Emit the address of the safe point. + OS.AddComment("safe point address"); + MCSymbol *Label = PI->Label; + AP.EmitLabelPlusOffset(Label/*Hi*/, 0/*Offset*/, 4/*Size*/); + } + + // Stack information never change in safe points! Only print info from the + // first call-site. + GCFunctionInfo::iterator PI = MD.begin(); + + // Emit the stack frame size. + OS.AddComment("stack frame size (in words)"); + AP.EmitInt16(MD.getFrameSize() / IntPtrSize); + + // Emit stack arity, i.e. the number of stacked arguments. + unsigned RegisteredArgs = IntPtrSize == 4 ? 5 : 6; + unsigned StackArity = MD.getFunction().arg_size() > RegisteredArgs ? + MD.getFunction().arg_size() - RegisteredArgs : 0; + OS.AddComment("stack arity"); + AP.EmitInt16(StackArity); + + // Emit the number of live roots in the function. + OS.AddComment("live root count"); + AP.EmitInt16(MD.live_size(PI)); + + // And for each live root... + for (GCFunctionInfo::live_iterator LI = MD.live_begin(PI), + LE = MD.live_end(PI); + LI != LE; ++LI) { + // Emit live root's offset within the stack frame. + OS.AddComment("stack index (offset / wordsize)"); + AP.EmitInt16(LI->StackOffset / IntPtrSize); + } + } +} diff --git a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp index f7c011968c23..98177c0ba1cf 100644 --- a/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/OcamlGCPrinter.cpp @@ -12,20 +12,20 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCs.h" +#include "llvm/ADT/SmallString.h" #include "llvm/CodeGen/AsmPrinter.h" #include "llvm/CodeGen/GCMetadataPrinter.h" -#include "llvm/Module.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/MC/MCSymbol.h" #include "llvm/MC/MCStreamer.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLoweringObjectFile.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" #include using namespace llvm; @@ -100,7 +100,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { EmitCamlGlobal(getModule(), AP, "data_end"); // FIXME: Why does ocaml emit this?? - AP.OutStreamer.EmitIntValue(0, IntPtrSize, 0); + AP.OutStreamer.EmitIntValue(0, IntPtrSize); AP.OutStreamer.SwitchSection(AP.getObjFileLowering().getDataSection()); EmitCamlGlobal(getModule(), AP, "frametable"); @@ -145,7 +145,7 @@ void OcamlGCMetadataPrinter::finishAssembly(AsmPrinter &AP) { "Live root count "+Twine(LiveCount)+" >= 65536."); } - AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize, 0); + AP.OutStreamer.EmitSymbolValue(J->Label, IntPtrSize); AP.EmitInt16(FrameSize); AP.EmitInt16(LiveCount); diff --git a/lib/CodeGen/AsmPrinter/Win64Exception.cpp b/lib/CodeGen/AsmPrinter/Win64Exception.cpp index 70742a8d2e35..156101286b75 100644 --- a/lib/CodeGen/AsmPrinter/Win64Exception.cpp +++ b/lib/CodeGen/AsmPrinter/Win64Exception.cpp @@ -12,30 +12,29 @@ //===----------------------------------------------------------------------===// #include "DwarfException.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Twine.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" #include "llvm/MC/MCSection.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Dwarf.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/FormattedStream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Twine.h" using namespace llvm; Win64Exception::Win64Exception(AsmPrinter *A) diff --git a/lib/CodeGen/BasicTargetTransformInfo.cpp b/lib/CodeGen/BasicTargetTransformInfo.cpp new file mode 100644 index 000000000000..012ff8ad8339 --- /dev/null +++ b/lib/CodeGen/BasicTargetTransformInfo.cpp @@ -0,0 +1,466 @@ +//===- BasicTargetTransformInfo.cpp - Basic target-independent TTI impl ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// \file +/// This file provides the implementation of a basic TargetTransformInfo pass +/// predicated on the target abstractions present in the target independent +/// code generator. It uses these (primarily TargetLowering) to model as much +/// of the TTI query interface as possible. It is included by most targets so +/// that they can specialize only a small subset of the query space. +/// +//===----------------------------------------------------------------------===// + +#define DEBUG_TYPE "basictti" +#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/TargetTransformInfo.h" +#include "llvm/Target/TargetLowering.h" +#include + +using namespace llvm; + +namespace { + +class BasicTTI : public ImmutablePass, public TargetTransformInfo { + const TargetLoweringBase *TLI; + + /// Estimate the overhead of scalarizing an instruction. Insert and Extract + /// are set if the result needs to be inserted and/or extracted from vectors. + unsigned getScalarizationOverhead(Type *Ty, bool Insert, bool Extract) const; + +public: + BasicTTI() : ImmutablePass(ID), TLI(0) { + llvm_unreachable("This pass cannot be directly constructed"); + } + + BasicTTI(const TargetLoweringBase *TLI) : ImmutablePass(ID), TLI(TLI) { + initializeBasicTTIPass(*PassRegistry::getPassRegistry()); + } + + virtual void initializePass() { + pushTTIStack(this); + } + + virtual void finalizePass() { + popTTIStack(); + } + + virtual void getAnalysisUsage(AnalysisUsage &AU) const { + TargetTransformInfo::getAnalysisUsage(AU); + } + + /// Pass identification. + static char ID; + + /// Provide necessary pointer adjustments for the two base classes. + virtual void *getAdjustedAnalysisPointer(const void *ID) { + if (ID == &TargetTransformInfo::ID) + return (TargetTransformInfo*)this; + return this; + } + + /// \name Scalar TTI Implementations + /// @{ + + virtual bool isLegalAddImmediate(int64_t imm) const; + virtual bool isLegalICmpImmediate(int64_t imm) const; + virtual bool isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const; + virtual bool isTruncateFree(Type *Ty1, Type *Ty2) const; + virtual bool isTypeLegal(Type *Ty) const; + virtual unsigned getJumpBufAlignment() const; + virtual unsigned getJumpBufSize() const; + virtual bool shouldBuildLookupTables() const; + + /// @} + + /// \name Vector TTI Implementations + /// @{ + + virtual unsigned getNumberOfRegisters(bool Vector) const; + virtual unsigned getMaximumUnrollFactor() const; + virtual unsigned getRegisterBitWidth(bool Vector) const; + virtual unsigned getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const; + virtual unsigned getShuffleCost(ShuffleKind Kind, Type *Tp, + int Index, Type *SubTp) const; + virtual unsigned getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const; + virtual unsigned getCFInstrCost(unsigned Opcode) const; + virtual unsigned getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const; + virtual unsigned getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const; + virtual unsigned getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const; + virtual unsigned getIntrinsicInstrCost(Intrinsic::ID, Type *RetTy, + ArrayRef Tys) const; + virtual unsigned getNumberOfParts(Type *Tp) const; + virtual unsigned getAddressComputationCost(Type *Ty) const; + + /// @} +}; + +} + +INITIALIZE_AG_PASS(BasicTTI, TargetTransformInfo, "basictti", + "Target independent code generator's TTI", true, true, false) +char BasicTTI::ID = 0; + +ImmutablePass * +llvm::createBasicTargetTransformInfoPass(const TargetLoweringBase *TLI) { + return new BasicTTI(TLI); +} + + +bool BasicTTI::isLegalAddImmediate(int64_t imm) const { + return TLI->isLegalAddImmediate(imm); +} + +bool BasicTTI::isLegalICmpImmediate(int64_t imm) const { + return TLI->isLegalICmpImmediate(imm); +} + +bool BasicTTI::isLegalAddressingMode(Type *Ty, GlobalValue *BaseGV, + int64_t BaseOffset, bool HasBaseReg, + int64_t Scale) const { + TargetLoweringBase::AddrMode AM; + AM.BaseGV = BaseGV; + AM.BaseOffs = BaseOffset; + AM.HasBaseReg = HasBaseReg; + AM.Scale = Scale; + return TLI->isLegalAddressingMode(AM, Ty); +} + +bool BasicTTI::isTruncateFree(Type *Ty1, Type *Ty2) const { + return TLI->isTruncateFree(Ty1, Ty2); +} + +bool BasicTTI::isTypeLegal(Type *Ty) const { + EVT T = TLI->getValueType(Ty); + return TLI->isTypeLegal(T); +} + +unsigned BasicTTI::getJumpBufAlignment() const { + return TLI->getJumpBufAlignment(); +} + +unsigned BasicTTI::getJumpBufSize() const { + return TLI->getJumpBufSize(); +} + +bool BasicTTI::shouldBuildLookupTables() const { + return TLI->supportJumpTables() && + (TLI->isOperationLegalOrCustom(ISD::BR_JT, MVT::Other) || + TLI->isOperationLegalOrCustom(ISD::BRIND, MVT::Other)); +} + +//===----------------------------------------------------------------------===// +// +// Calls used by the vectorizers. +// +//===----------------------------------------------------------------------===// + +unsigned BasicTTI::getScalarizationOverhead(Type *Ty, bool Insert, + bool Extract) const { + assert (Ty->isVectorTy() && "Can only scalarize vectors"); + unsigned Cost = 0; + + for (int i = 0, e = Ty->getVectorNumElements(); i < e; ++i) { + if (Insert) + Cost += TopTTI->getVectorInstrCost(Instruction::InsertElement, Ty, i); + if (Extract) + Cost += TopTTI->getVectorInstrCost(Instruction::ExtractElement, Ty, i); + } + + return Cost; +} + +unsigned BasicTTI::getNumberOfRegisters(bool Vector) const { + return 1; +} + +unsigned BasicTTI::getRegisterBitWidth(bool Vector) const { + return 32; +} + +unsigned BasicTTI::getMaximumUnrollFactor() const { + return 1; +} + +unsigned BasicTTI::getArithmeticInstrCost(unsigned Opcode, Type *Ty, + OperandValueKind, + OperandValueKind) const { + // Check if any of the operands are vector operands. + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair LT = TLI->getTypeLegalizationCost(Ty); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this op. + if (Ty->isVectorTy()) { + unsigned Num = Ty->getVectorNumElements(); + unsigned Cost = TopTTI->getArithmeticInstrCost(Opcode, Ty->getScalarType()); + // return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(Ty, true, true) + Num * Cost; + } + + // We don't know anything about this scalar instruction. + return 1; +} + +unsigned BasicTTI::getShuffleCost(ShuffleKind Kind, Type *Tp, int Index, + Type *SubTp) const { + return 1; +} + +unsigned BasicTTI::getCastInstrCost(unsigned Opcode, Type *Dst, + Type *Src) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + std::pair SrcLT = TLI->getTypeLegalizationCost(Src); + std::pair DstLT = TLI->getTypeLegalizationCost(Dst); + + // Check for NOOP conversions. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Bitcast between types that are legalized to the same type are free. + if (Opcode == Instruction::BitCast || Opcode == Instruction::Trunc) + return 0; + } + + if (Opcode == Instruction::Trunc && + TLI->isTruncateFree(SrcLT.second, DstLT.second)) + return 0; + + if (Opcode == Instruction::ZExt && + TLI->isZExtFree(SrcLT.second, DstLT.second)) + return 0; + + // If the cast is marked as legal (or promote) then assume low cost. + if (TLI->isOperationLegalOrPromote(ISD, DstLT.second)) + return 1; + + // Handle scalar conversions. + if (!Src->isVectorTy() && !Dst->isVectorTy()) { + + // Scalar bitcasts are usually free. + if (Opcode == Instruction::BitCast) + return 0; + + // Just check the op cost. If the operation is legal then assume it costs 1. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return 1; + + // Assume that illegal scalar instruction are expensive. + return 4; + } + + // Check vector-to-vector casts. + if (Dst->isVectorTy() && Src->isVectorTy()) { + + // If the cast is between same-sized registers, then the check is simple. + if (SrcLT.first == DstLT.first && + SrcLT.second.getSizeInBits() == DstLT.second.getSizeInBits()) { + + // Assume that Zext is done using AND. + if (Opcode == Instruction::ZExt) + return 1; + + // Assume that sext is done using SHL and SRA. + if (Opcode == Instruction::SExt) + return 2; + + // Just check the op cost. If the operation is legal then assume it costs + // 1 and multiply by the type-legalization overhead. + if (!TLI->isOperationExpand(ISD, DstLT.second)) + return SrcLT.first * 1; + } + + // If we are converting vectors and the operation is illegal, or + // if the vectors are legalized to different types, estimate the + // scalarization costs. + unsigned Num = Dst->getVectorNumElements(); + unsigned Cost = TopTTI->getCastInstrCost(Opcode, Dst->getScalarType(), + Src->getScalarType()); + + // Return the cost of multiple scalar invocation plus the cost of + // inserting and extracting the values. + return getScalarizationOverhead(Dst, true, true) + Num * Cost; + } + + // We already handled vector-to-vector and scalar-to-scalar conversions. This + // is where we handle bitcast between vectors and scalars. We need to assume + // that the conversion is scalarized in one way or another. + if (Opcode == Instruction::BitCast) + // Illegal bitcasts are done by storing and loading from a stack slot. + return (Src->isVectorTy()? getScalarizationOverhead(Src, false, true):0) + + (Dst->isVectorTy()? getScalarizationOverhead(Dst, true, false):0); + + llvm_unreachable("Unhandled cast"); + } + +unsigned BasicTTI::getCFInstrCost(unsigned Opcode) const { + // Branches are assumed to be predicted. + return 0; +} + +unsigned BasicTTI::getCmpSelInstrCost(unsigned Opcode, Type *ValTy, + Type *CondTy) const { + int ISD = TLI->InstructionOpcodeToISD(Opcode); + assert(ISD && "Invalid opcode"); + + // Selects on vectors are actually vector selects. + if (ISD == ISD::SELECT) { + assert(CondTy && "CondTy must exist"); + if (CondTy->isVectorTy()) + ISD = ISD::VSELECT; + } + + std::pair LT = TLI->getTypeLegalizationCost(ValTy); + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. Multiply + // by the type-legalization overhead. + return LT.first * 1; + } + + // Otherwise, assume that the cast is scalarized. + if (ValTy->isVectorTy()) { + unsigned Num = ValTy->getVectorNumElements(); + if (CondTy) + CondTy = CondTy->getScalarType(); + unsigned Cost = TopTTI->getCmpSelInstrCost(Opcode, ValTy->getScalarType(), + CondTy); + + // Return the cost of multiple scalar invocation plus the cost of inserting + // and extracting the values. + return getScalarizationOverhead(ValTy, true, false) + Num * Cost; + } + + // Unknown scalar opcode. + return 1; +} + +unsigned BasicTTI::getVectorInstrCost(unsigned Opcode, Type *Val, + unsigned Index) const { + return 1; +} + +unsigned BasicTTI::getMemoryOpCost(unsigned Opcode, Type *Src, + unsigned Alignment, + unsigned AddressSpace) const { + assert(!Src->isVoidTy() && "Invalid type"); + std::pair LT = TLI->getTypeLegalizationCost(Src); + + // Assume that all loads of legal types cost 1. + return LT.first; +} + +unsigned BasicTTI::getIntrinsicInstrCost(Intrinsic::ID IID, Type *RetTy, + ArrayRef Tys) const { + unsigned ISD = 0; + switch (IID) { + default: { + // Assume that we need to scalarize this intrinsic. + unsigned ScalarizationCost = 0; + unsigned ScalarCalls = 1; + if (RetTy->isVectorTy()) { + ScalarizationCost = getScalarizationOverhead(RetTy, true, false); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + for (unsigned i = 0, ie = Tys.size(); i != ie; ++i) { + if (Tys[i]->isVectorTy()) { + ScalarizationCost += getScalarizationOverhead(Tys[i], false, true); + ScalarCalls = std::max(ScalarCalls, RetTy->getVectorNumElements()); + } + } + + return ScalarCalls + ScalarizationCost; + } + // Look for intrinsics that can be lowered directly or turned into a scalar + // intrinsic call. + case Intrinsic::sqrt: ISD = ISD::FSQRT; break; + case Intrinsic::sin: ISD = ISD::FSIN; break; + case Intrinsic::cos: ISD = ISD::FCOS; break; + case Intrinsic::exp: ISD = ISD::FEXP; break; + case Intrinsic::exp2: ISD = ISD::FEXP2; break; + case Intrinsic::log: ISD = ISD::FLOG; break; + case Intrinsic::log10: ISD = ISD::FLOG10; break; + case Intrinsic::log2: ISD = ISD::FLOG2; break; + case Intrinsic::fabs: ISD = ISD::FABS; break; + case Intrinsic::floor: ISD = ISD::FFLOOR; break; + case Intrinsic::ceil: ISD = ISD::FCEIL; break; + case Intrinsic::trunc: ISD = ISD::FTRUNC; break; + case Intrinsic::rint: ISD = ISD::FRINT; break; + case Intrinsic::pow: ISD = ISD::FPOW; break; + case Intrinsic::fma: ISD = ISD::FMA; break; + case Intrinsic::fmuladd: ISD = ISD::FMA; break; // FIXME: mul + add? + } + + std::pair LT = TLI->getTypeLegalizationCost(RetTy); + + if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { + // The operation is legal. Assume it costs 1. + // If the type is split to multiple registers, assume that thre is some + // overhead to this. + // TODO: Once we have extract/insert subvector cost we need to use them. + if (LT.first > 1) + return LT.first * 2; + return LT.first * 1; + } + + if (!TLI->isOperationExpand(ISD, LT.second)) { + // If the operation is custom lowered then assume + // thare the code is twice as expensive. + return LT.first * 2; + } + + // Else, assume that we need to scalarize this intrinsic. For math builtins + // this will emit a costly libcall, adding call overhead and spills. Make it + // very expensive. + if (RetTy->isVectorTy()) { + unsigned Num = RetTy->getVectorNumElements(); + unsigned Cost = TopTTI->getIntrinsicInstrCost(IID, RetTy->getScalarType(), + Tys); + return 10 * Cost * Num; + } + + // This is going to be turned into a library call, make it expensive. + return 10; +} + +unsigned BasicTTI::getNumberOfParts(Type *Tp) const { + std::pair LT = TLI->getTypeLegalizationCost(Tp); + return LT.first; +} + +unsigned BasicTTI::getAddressComputationCost(Type *Ty) const { + return 0; +} diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 6f4c5a2f667b..f8cc3b3999e8 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -18,24 +18,23 @@ #define DEBUG_TYPE "branchfolding" #include "BranchFolding.h" -#include "llvm/Function.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -571,8 +570,8 @@ static bool ProfitableToMerge(MachineBasicBlock *MBB1, // instructions that would be deleted in the merge. MachineFunction *MF = MBB1->getParent(); if (EffectiveTailLen >= 2 && - MF->getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize) && + MF->getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize) && (I1 == MBB1->begin() || I2 == MBB2->begin())) return true; diff --git a/lib/CodeGen/CMakeLists.txt b/lib/CodeGen/CMakeLists.txt index fa6d4e16cfe8..56aa3309d3dd 100644 --- a/lib/CodeGen/CMakeLists.txt +++ b/lib/CodeGen/CMakeLists.txt @@ -2,17 +2,18 @@ add_llvm_library(LLVMCodeGen AggressiveAntiDepBreaker.cpp AllocationOrder.cpp Analysis.cpp + BasicTargetTransformInfo.cpp BranchFolding.cpp CalcSpillWeights.cpp CallingConvLower.cpp CodeGen.cpp - CodePlacementOpt.cpp CriticalAntiDepBreaker.cpp - DeadMachineInstructionElim.cpp DFAPacketizer.cpp + DeadMachineInstructionElim.cpp DwarfEHPrepare.cpp EarlyIfConversion.cpp EdgeBundles.cpp + ErlangGC.cpp ExecutionDepsFix.cpp ExpandISelPseudos.cpp ExpandPostRAPseudos.cpp @@ -31,21 +32,20 @@ add_llvm_library(LLVMCodeGen LiveInterval.cpp LiveIntervalAnalysis.cpp LiveIntervalUnion.cpp + LiveRangeCalc.cpp + LiveRangeEdit.cpp LiveRegMatrix.cpp LiveStackAnalysis.cpp LiveVariables.cpp - LiveRangeCalc.cpp - LiveRangeEdit.cpp LocalStackSlotAllocation.cpp MachineBasicBlock.cpp MachineBlockFrequencyInfo.cpp MachineBlockPlacement.cpp MachineBranchProbabilityInfo.cpp + MachineCSE.cpp MachineCodeEmitter.cpp MachineCopyPropagation.cpp - MachineCSE.cpp MachineDominators.cpp - MachinePostDominators.cpp MachineFunction.cpp MachineFunctionAnalysis.cpp MachineFunctionPass.cpp @@ -54,10 +54,10 @@ add_llvm_library(LLVMCodeGen MachineInstrBundle.cpp MachineLICM.cpp MachineLoopInfo.cpp - MachineLoopRanges.cpp MachineModuleInfo.cpp MachineModuleInfoImpls.cpp MachinePassRegistry.cpp + MachinePostDominators.cpp MachineRegisterInfo.cpp MachineSSAUpdater.cpp MachineScheduler.cpp @@ -91,18 +91,20 @@ add_llvm_library(LLVMCodeGen ShrinkWrapping.cpp SjLjEHPrepare.cpp SlotIndexes.cpp - Spiller.cpp SpillPlacement.cpp + Spiller.cpp SplitKit.cpp + StackColoring.cpp StackProtector.cpp StackSlotColoring.cpp - StackColoring.cpp StrongPHIElimination.cpp TailDuplication.cpp TargetFrameLoweringImpl.cpp - TargetInstrInfoImpl.cpp + TargetInstrInfo.cpp + TargetLoweringBase.cpp TargetLoweringObjectFileImpl.cpp TargetOptionsImpl.cpp + TargetRegisterInfo.cpp TargetSchedule.cpp TwoAddressInstructionPass.cpp UnreachableBlockElim.cpp diff --git a/lib/CodeGen/CallingConvLower.cpp b/lib/CodeGen/CallingConvLower.cpp index 22b91409240b..f1d4ace92273 100644 --- a/lib/CodeGen/CallingConvLower.cpp +++ b/lib/CodeGen/CallingConvLower.cpp @@ -14,13 +14,13 @@ #include "llvm/CodeGen/CallingConvLower.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; CCState::CCState(CallingConv::ID CC, bool isVarArg, MachineFunction &mf, @@ -74,7 +74,7 @@ CCState::AnalyzeFormalArguments(const SmallVectorImpl &Ins, if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Formal argument #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString(); + << EVT(ArgVT).getEVTString() << '\n'; #endif llvm_unreachable(0); } @@ -106,7 +106,7 @@ void CCState::AnalyzeReturn(const SmallVectorImpl &Outs, if (Fn(i, VT, VT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Return operand #" << i << " has unhandled type " - << EVT(VT).getEVTString(); + << EVT(VT).getEVTString() << '\n'; #endif llvm_unreachable(0); } @@ -124,7 +124,7 @@ void CCState::AnalyzeCallOperands(const SmallVectorImpl &Outs, if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString(); + << EVT(ArgVT).getEVTString() << '\n'; #endif llvm_unreachable(0); } @@ -143,7 +143,7 @@ void CCState::AnalyzeCallOperands(SmallVectorImpl &ArgVTs, if (Fn(i, ArgVT, ArgVT, CCValAssign::Full, ArgFlags, *this)) { #ifndef NDEBUG dbgs() << "Call operand #" << i << " has unhandled type " - << EVT(ArgVT).getEVTString(); + << EVT(ArgVT).getEVTString() << '\n'; #endif llvm_unreachable(0); } @@ -160,7 +160,7 @@ void CCState::AnalyzeCallResult(const SmallVectorImpl &Ins, if (Fn(i, VT, VT, CCValAssign::Full, Flags, *this)) { #ifndef NDEBUG dbgs() << "Call result #" << i << " has unhandled type " - << EVT(VT).getEVTString() << "\n"; + << EVT(VT).getEVTString() << '\n'; #endif llvm_unreachable(0); } @@ -173,7 +173,7 @@ void CCState::AnalyzeCallResult(MVT VT, CCAssignFn Fn) { if (Fn(0, VT, VT, CCValAssign::Full, ISD::ArgFlagsTy(), *this)) { #ifndef NDEBUG dbgs() << "Call result has unhandled type " - << EVT(VT).getEVTString(); + << EVT(VT).getEVTString() << '\n'; #endif llvm_unreachable(0); } diff --git a/lib/CodeGen/CodeGen.cpp b/lib/CodeGen/CodeGen.cpp index a53f6f8d0f1b..35ec68d00cec 100644 --- a/lib/CodeGen/CodeGen.cpp +++ b/lib/CodeGen/CodeGen.cpp @@ -19,9 +19,9 @@ using namespace llvm; /// initializeCodeGen - Initialize all passes linked into the CodeGen library. void llvm::initializeCodeGen(PassRegistry &Registry) { + initializeBasicTTIPass(Registry); initializeBranchFolderPassPass(Registry); initializeCalculateSpillWeightsPass(Registry); - initializeCodePlacementOptPass(Registry); initializeDeadMachineInstructionElimPass(Registry); initializeEarlyIfConverterPass(Registry); initializeExpandPostRAPass(Registry); diff --git a/lib/CodeGen/CodePlacementOpt.cpp b/lib/CodeGen/CodePlacementOpt.cpp deleted file mode 100644 index d8e06c33a68e..000000000000 --- a/lib/CodeGen/CodePlacementOpt.cpp +++ /dev/null @@ -1,422 +0,0 @@ -//===-- CodePlacementOpt.cpp - Code Placement pass. -----------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the pass that optimizes code placement and aligns loop -// headers to target-specific alignment boundaries. -// -//===----------------------------------------------------------------------===// - -#define DEBUG_TYPE "code-placement" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" -using namespace llvm; - -STATISTIC(NumLoopsAligned, "Number of loops aligned"); -STATISTIC(NumIntraElim, "Number of intra loop branches eliminated"); -STATISTIC(NumIntraMoved, "Number of intra loop branches moved"); - -namespace { - class CodePlacementOpt : public MachineFunctionPass { - const MachineLoopInfo *MLI; - const TargetInstrInfo *TII; - const TargetLowering *TLI; - - public: - static char ID; - CodePlacementOpt() : MachineFunctionPass(ID) {} - - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.addRequired(); - AU.addPreservedID(MachineDominatorsID); - MachineFunctionPass::getAnalysisUsage(AU); - } - - private: - bool HasFallthrough(MachineBasicBlock *MBB); - bool HasAnalyzableTerminator(MachineBasicBlock *MBB); - void Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End); - bool EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L); - bool MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L); - bool OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, MachineLoop *L); - bool OptimizeIntraLoopEdges(MachineFunction &MF); - bool AlignLoops(MachineFunction &MF); - bool AlignLoop(MachineFunction &MF, MachineLoop *L, unsigned Align); - }; - - char CodePlacementOpt::ID = 0; -} // end anonymous namespace - -char &llvm::CodePlacementOptID = CodePlacementOpt::ID; -INITIALIZE_PASS(CodePlacementOpt, "code-placement", - "Code Placement Optimizer", false, false) - -/// HasFallthrough - Test whether the given branch has a fallthrough, either as -/// a plain fallthrough or as a fallthrough case of a conditional branch. -/// -bool CodePlacementOpt::HasFallthrough(MachineBasicBlock *MBB) { - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // This conditional branch has no fallthrough. - if (FBB) - return false; - // An unconditional branch has no fallthrough. - if (Cond.empty() && TBB) - return false; - // It has a fallthrough. - return true; -} - -/// HasAnalyzableTerminator - Test whether AnalyzeBranch will succeed on MBB. -/// This is called before major changes are begun to test whether it will be -/// possible to complete the changes. -/// -/// Target-specific code is hereby encouraged to make AnalyzeBranch succeed -/// whenever possible. -/// -bool CodePlacementOpt::HasAnalyzableTerminator(MachineBasicBlock *MBB) { - // Conservatively ignore EH landing pads. - if (MBB->isLandingPad()) return false; - - // Aggressively handle return blocks and similar constructs. - if (MBB->succ_empty()) return true; - - // Ask the target's AnalyzeBranch if it can handle this block. - MachineBasicBlock *TBB = 0, *FBB = 0; - SmallVector Cond; - // Make sure the terminator is understood. - if (TII->AnalyzeBranch(*MBB, TBB, FBB, Cond)) - return false; - // Ignore blocks which look like they might have EH-related control flow. - // AnalyzeBranch thinks it knows how to analyze such things, but it doesn't - // recognize the possibility of a control transfer through an unwind. - // Such blocks contain EH_LABEL instructions, however they may be in the - // middle of the block. Instead of searching for them, just check to see - // if the CFG disagrees with AnalyzeBranch. - if (1u + !Cond.empty() != MBB->succ_size()) - return false; - // Make sure we have the option of reversing the condition. - if (!Cond.empty() && TII->ReverseBranchCondition(Cond)) - return false; - return true; -} - -/// Splice - Move the sequence of instructions [Begin,End) to just before -/// InsertPt. Update branch instructions as needed to account for broken -/// fallthrough edges and to take advantage of newly exposed fallthrough -/// opportunities. -/// -void CodePlacementOpt::Splice(MachineFunction &MF, - MachineFunction::iterator InsertPt, - MachineFunction::iterator Begin, - MachineFunction::iterator End) { - assert(Begin != MF.begin() && End != MF.begin() && InsertPt != MF.begin() && - "Splice can't change the entry block!"); - MachineFunction::iterator OldBeginPrior = prior(Begin); - MachineFunction::iterator OldEndPrior = prior(End); - - MF.splice(InsertPt, Begin, End); - - prior(Begin)->updateTerminator(); - OldBeginPrior->updateTerminator(); - OldEndPrior->updateTerminator(); -} - -/// EliminateUnconditionalJumpsToTop - Move blocks which unconditionally jump -/// to the loop top to the top of the loop so that they have a fall through. -/// This can introduce a branch on entry to the loop, but it can eliminate a -/// branch within the loop. See the @simple case in -/// test/CodeGen/X86/loop_blocks.ll for an example of this. -bool CodePlacementOpt::EliminateUnconditionalJumpsToTop(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - - bool BotHasFallthrough = HasFallthrough(L->getBottomBlock()); - - if (TopMBB == MF.begin() || - HasAnalyzableTerminator(prior(MachineFunction::iterator(TopMBB)))) { - new_top: - for (MachineBasicBlock::pred_iterator PI = TopMBB->pred_begin(), - PE = TopMBB->pred_end(); PI != PE; ++PI) { - MachineBasicBlock *Pred = *PI; - if (Pred == TopMBB) continue; - if (HasFallthrough(Pred)) continue; - if (!L->contains(Pred)) continue; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (Pred == MF.begin()) - continue; - if (!HasAnalyzableTerminator(Pred)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Pred)))) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << Pred->getNumber() - << " to top of loop.\n"); - Changed = true; - - // Move it and all the blocks that can reach it via fallthrough edges - // exclusively, to keep existing fallthrough edges intact. - MachineFunction::iterator Begin = Pred; - MachineFunction::iterator End = llvm::next(Begin); - while (Begin != MF.begin()) { - MachineFunction::iterator Prior = prior(Begin); - if (Prior == MF.begin()) - break; - // Stop when a non-fallthrough edge is found. - if (!HasFallthrough(Prior)) - break; - // Stop if a block which could fall-through out of the loop is found. - if (Prior->isSuccessor(End)) - break; - // If we've reached the top, stop scanning. - if (Prior == MachineFunction::iterator(TopMBB)) { - // We know top currently has a fall through (because we just checked - // it) which would be lost if we do the transformation, so it isn't - // worthwhile to do the transformation unless it would expose a new - // fallthrough edge. - if (!Prior->isSuccessor(End)) - goto next_pred; - // Otherwise we can stop scanning and proceed to move the blocks. - break; - } - // If we hit a switch or something complicated, don't move anything - // for this predecessor. - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(Prior)))) - break; - // Ok, the block prior to Begin will be moved along with the rest. - // Extend the range to include it. - Begin = Prior; - ++NumIntraMoved; - } - - // Move the blocks. - Splice(MF, TopMBB, Begin, End); - - // Update TopMBB. - TopMBB = L->getTopBlock(); - - // We have a new loop top. Iterate on it. We shouldn't have to do this - // too many times if BranchFolding has done a reasonable job. - goto new_top; - next_pred:; - } - } - - // If the loop previously didn't exit with a fall-through and it now does, - // we eliminated a branch. - if (Changed && - !BotHasFallthrough && - HasFallthrough(L->getBottomBlock())) { - ++NumIntraElim; - } - - return Changed; -} - -/// MoveDiscontiguousLoopBlocks - Move any loop blocks that are not in the -/// portion of the loop contiguous with the header. This usually makes the loop -/// contiguous, provided that AnalyzeBranch can handle all the relevant -/// branching. See the @cfg_islands case in test/CodeGen/X86/loop_blocks.ll -/// for an example of this. -bool CodePlacementOpt::MoveDiscontiguousLoopBlocks(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - MachineBasicBlock *TopMBB = L->getTopBlock(); - MachineBasicBlock *BotMBB = L->getBottomBlock(); - - // Determine a position to move orphaned loop blocks to. If TopMBB is not - // entered via fallthrough and BotMBB is exited via fallthrough, prepend them - // to the top of the loop to avoid losing that fallthrough. Otherwise append - // them to the bottom, even if it previously had a fallthrough, on the theory - // that it's worth an extra branch to keep the loop contiguous. - MachineFunction::iterator InsertPt = - llvm::next(MachineFunction::iterator(BotMBB)); - bool InsertAtTop = false; - if (TopMBB != MF.begin() && - !HasFallthrough(prior(MachineFunction::iterator(TopMBB))) && - HasFallthrough(BotMBB)) { - InsertPt = TopMBB; - InsertAtTop = true; - } - - // Keep a record of which blocks are in the portion of the loop contiguous - // with the loop header. - SmallPtrSet ContiguousBlocks; - for (MachineFunction::iterator I = TopMBB, - E = llvm::next(MachineFunction::iterator(BotMBB)); I != E; ++I) - ContiguousBlocks.insert(I); - - // Find non-contigous blocks and fix them. - if (InsertPt != MF.begin() && HasAnalyzableTerminator(prior(InsertPt))) - for (MachineLoop::block_iterator BI = L->block_begin(), BE = L->block_end(); - BI != BE; ++BI) { - MachineBasicBlock *BB = *BI; - - // Verify that we can analyze all the loop entry edges before beginning - // any changes which will require us to be able to analyze them. - if (!HasAnalyzableTerminator(BB)) - continue; - if (!HasAnalyzableTerminator(prior(MachineFunction::iterator(BB)))) - continue; - - // If the layout predecessor is part of the loop, this block will be - // processed along with it. This keeps them in their relative order. - if (BB != MF.begin() && - L->contains(prior(MachineFunction::iterator(BB)))) - continue; - - // Check to see if this block is already contiguous with the main - // portion of the loop. - if (!ContiguousBlocks.insert(BB)) - continue; - - // Move the block. - DEBUG(dbgs() << "CGP: Moving blocks starting at BB#" << BB->getNumber() - << " to be contiguous with loop.\n"); - Changed = true; - - // Process this block and all loop blocks contiguous with it, to keep - // them in their relative order. - MachineFunction::iterator Begin = BB; - MachineFunction::iterator End = llvm::next(MachineFunction::iterator(BB)); - for (; End != MF.end(); ++End) { - if (!L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - ContiguousBlocks.insert(End); - ++NumIntraMoved; - } - - // If we're inserting at the bottom of the loop, and the code we're - // moving originally had fall-through successors, bring the sucessors - // up with the loop blocks to preserve the fall-through edges. - if (!InsertAtTop) - for (; End != MF.end(); ++End) { - if (L->contains(End)) break; - if (!HasAnalyzableTerminator(End)) break; - if (!HasFallthrough(prior(End))) break; - } - - // Move the blocks. This may invalidate TopMBB and/or BotMBB, but - // we don't need them anymore at this point. - Splice(MF, InsertPt, Begin, End); - } - - return Changed; -} - -/// OptimizeIntraLoopEdgesInLoopNest - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -/// This code takes the approach of making minor changes to the existing -/// layout to fix specific loop-oriented problems. Also, it depends on -/// AnalyzeBranch, which can't understand complex control instructions. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdgesInLoopNest(MachineFunction &MF, - MachineLoop *L) { - bool Changed = false; - - // Do optimization for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - // Do optimization for this loop. - Changed |= EliminateUnconditionalJumpsToTop(MF, L); - Changed |= MoveDiscontiguousLoopBlocks(MF, L); - - return Changed; -} - -/// OptimizeIntraLoopEdges - Reposition loop blocks to minimize -/// intra-loop branching and to form contiguous loops. -/// -bool CodePlacementOpt::OptimizeIntraLoopEdges(MachineFunction &MF) { - bool Changed = false; - - if (!TLI->shouldOptimizeCodePlacement()) - return Changed; - - // Do optimization for each loop in the function. - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - if (!(*I)->getParentLoop()) - Changed |= OptimizeIntraLoopEdgesInLoopNest(MF, *I); - - return Changed; -} - -/// AlignLoops - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoops(MachineFunction &MF) { - const Function *F = MF.getFunction(); - if (F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) - return false; - - unsigned Align = TLI->getPrefLoopAlignment(); - if (!Align) - return false; // Don't care about loop alignment. - - bool Changed = false; - - for (MachineLoopInfo::iterator I = MLI->begin(), E = MLI->end(); - I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - return Changed; -} - -/// AlignLoop - Align loop headers to target preferred alignments. -/// -bool CodePlacementOpt::AlignLoop(MachineFunction &MF, MachineLoop *L, - unsigned Align) { - bool Changed = false; - - // Do alignment for nested loops. - for (MachineLoop::iterator I = L->begin(), E = L->end(); I != E; ++I) - Changed |= AlignLoop(MF, *I, Align); - - L->getTopBlock()->setAlignment(Align); - Changed = true; - ++NumLoopsAligned; - - return Changed; -} - -bool CodePlacementOpt::runOnMachineFunction(MachineFunction &MF) { - MLI = &getAnalysis(); - if (MLI->empty()) - return false; // No loops. - - TLI = MF.getTarget().getTargetLowering(); - TII = MF.getTarget().getInstrInfo(); - - bool Changed = OptimizeIntraLoopEdges(MF); - - Changed |= AlignLoops(MF); - - return Changed; -} diff --git a/lib/CodeGen/CriticalAntiDepBreaker.cpp b/lib/CodeGen/CriticalAntiDepBreaker.cpp index 377b4712beac..0eb74a40d589 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.cpp +++ b/lib/CodeGen/CriticalAntiDepBreaker.cpp @@ -17,12 +17,12 @@ #include "CriticalAntiDepBreaker.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -57,23 +57,7 @@ void CriticalAntiDepBreaker::StartBlock(MachineBasicBlock *BB) { bool IsReturnBlock = (BBSize != 0 && BB->back().isReturn()); - // Determine the live-out physregs for this block. - if (IsReturnBlock) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { - for (MCRegAliasIterator AI(*I, TRI, true); AI.isValid(); ++AI) { - unsigned Reg = *AI; - Classes[Reg] = reinterpret_cast(-1); - KillIndices[Reg] = BBSize; - DefIndices[Reg] = ~0u; - } - } - } - - // In a non-return block, examine the live-in regs of all successors. - // Note a return block can have successors if the return instruction is - // predicated. + // Examine the live-in regs of all successors. for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), @@ -371,14 +355,15 @@ CriticalAntiDepBreaker::isNewRegClobberedByRefs(RegRefIter RegRefBegin, return false; } -unsigned -CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, - RegRefIter RegRefEnd, - unsigned AntiDepReg, - unsigned LastNewReg, - const TargetRegisterClass *RC) +unsigned CriticalAntiDepBreaker:: +findSuitableFreeRegister(RegRefIter RegRefBegin, + RegRefIter RegRefEnd, + unsigned AntiDepReg, + unsigned LastNewReg, + const TargetRegisterClass *RC, + SmallVector &Forbid) { - ArrayRef Order = RegClassInfo.getOrder(RC); + ArrayRef Order = RegClassInfo.getOrder(RC); for (unsigned i = 0; i != Order.size(); ++i) { unsigned NewReg = Order[i]; // Don't replace a register with itself. @@ -401,6 +386,15 @@ CriticalAntiDepBreaker::findSuitableFreeRegister(RegRefIter RegRefBegin, Classes[NewReg] == reinterpret_cast(-1) || KillIndices[AntiDepReg] > DefIndices[NewReg]) continue; + // If NewReg overlaps any of the forbidden registers, we can't use it. + bool Forbidden = false; + for (SmallVector::iterator it = Forbid.begin(), + ite = Forbid.end(); it != ite; ++it) + if (TRI->regsOverlap(NewReg, *it)) { + Forbidden = true; + break; + } + if (Forbidden) continue; return NewReg; } @@ -564,6 +558,8 @@ BreakAntiDependencies(const std::vector& SUnits, PrescanInstruction(MI); + SmallVector ForbidRegs; + // If MI's defs have a special allocation requirement, don't allow // any def registers to be changed. Also assume all registers // defined in a call must not be changed (ABI). @@ -574,7 +570,9 @@ BreakAntiDependencies(const std::vector& SUnits, AntiDepReg = 0; else if (AntiDepReg) { // If this instruction has a use of AntiDepReg, breaking it - // is invalid. + // is invalid. If the instruction defines other registers, + // save a list of them so that we don't pick a new register + // that overlaps any of them. for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { MachineOperand &MO = MI->getOperand(i); if (!MO.isReg()) continue; @@ -584,6 +582,8 @@ BreakAntiDependencies(const std::vector& SUnits, AntiDepReg = 0; break; } + if (MO.isDef() && Reg != AntiDepReg) + ForbidRegs.push_back(Reg); } } @@ -606,7 +606,7 @@ BreakAntiDependencies(const std::vector& SUnits, if (unsigned NewReg = findSuitableFreeRegister(Range.first, Range.second, AntiDepReg, LastNewReg[AntiDepReg], - RC)) { + RC, ForbidRegs)) { DEBUG(dbgs() << "Breaking anti-dependence edge on " << TRI->getName(AntiDepReg) << " with " << RegRefs.count(AntiDepReg) << " references" diff --git a/lib/CodeGen/CriticalAntiDepBreaker.h b/lib/CodeGen/CriticalAntiDepBreaker.h index ad95c4819119..df13dd31f6b2 100644 --- a/lib/CodeGen/CriticalAntiDepBreaker.h +++ b/lib/CodeGen/CriticalAntiDepBreaker.h @@ -17,13 +17,13 @@ #define LLVM_CODEGEN_CRITICALANTIDEPBREAKER_H #include "AntiDepBreaker.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/ADT/BitVector.h" #include namespace llvm { @@ -102,7 +102,8 @@ class TargetRegisterInfo; RegRefIter RegRefEnd, unsigned AntiDepReg, unsigned LastNewReg, - const TargetRegisterClass *RC); + const TargetRegisterClass *RC, + SmallVector &Forbid); }; } diff --git a/lib/CodeGen/DFAPacketizer.cpp b/lib/CodeGen/DFAPacketizer.cpp index ff2f11353afd..840a10128daf 100644 --- a/lib/CodeGen/DFAPacketizer.cpp +++ b/lib/CodeGen/DFAPacketizer.cpp @@ -23,12 +23,12 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/DFAPacketizer.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; DFAPacketizer::DFAPacketizer(const InstrItineraryData *I, const int (*SIT)[2], diff --git a/lib/CodeGen/DeadMachineInstructionElim.cpp b/lib/CodeGen/DeadMachineInstructionElim.cpp index 8964269dde5f..a54217f5b2fb 100644 --- a/lib/CodeGen/DeadMachineInstructionElim.cpp +++ b/lib/CodeGen/DeadMachineInstructionElim.cpp @@ -13,14 +13,14 @@ #define DEBUG_TYPE "codegen-dce" #include "llvm/CodeGen/Passes.h" -#include "llvm/Pass.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumDeletes, "Number of dead instructions deleted"); @@ -99,15 +99,6 @@ bool DeadMachineInstructionElim::runOnMachineFunction(MachineFunction &MF) { // Start out assuming that reserved registers are live out of this block. LivePhysRegs = MRI->getReservedRegs(); - // Also add any explicit live-out physregs for this block. - if (!MBB->empty() && MBB->back().isReturn()) - for (MachineRegisterInfo::liveout_iterator LOI = MRI->liveout_begin(), - LOE = MRI->liveout_end(); LOI != LOE; ++LOI) { - unsigned Reg = *LOI; - if (TargetRegisterInfo::isPhysicalRegister(Reg)) - LivePhysRegs.set(Reg); - } - // Add live-ins from sucessors to LivePhysRegs. Normally, physregs are not // live across blocks, but some targets (x86) can have flags live out of a // block. diff --git a/lib/CodeGen/DwarfEHPrepare.cpp b/lib/CodeGen/DwarfEHPrepare.cpp index 709562438ce2..f27ec770ebad 100644 --- a/lib/CodeGen/DwarfEHPrepare.cpp +++ b/lib/CodeGen/DwarfEHPrepare.cpp @@ -13,15 +13,15 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dwarfehprepare" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCAsmInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/CallSite.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -33,7 +33,7 @@ STATISTIC(NumResumesLowered, "Number of resume calls lowered"); namespace { class DwarfEHPrepare : public FunctionPass { const TargetMachine *TM; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; // RewindFunction - _Unwind_Resume or the target equivalent. Constant *RewindFunction; diff --git a/lib/CodeGen/EarlyIfConversion.cpp b/lib/CodeGen/EarlyIfConversion.cpp index d5d84041b69f..5447df09cbb2 100644 --- a/lib/CodeGen/EarlyIfConversion.cpp +++ b/lib/CodeGen/EarlyIfConversion.cpp @@ -17,7 +17,6 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "early-ifcvt" -#include "MachineTraceMetrics.h" #include "llvm/ADT/BitVector.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" @@ -30,13 +29,14 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; @@ -459,7 +459,6 @@ void SSAIfConv::replacePHIInstrs() { for (unsigned i = 0, e = PHIs.size(); i != e; ++i) { PHIInfo &PI = PHIs[i]; DEBUG(dbgs() << "If-converting " << *PI.PHI); - assert(PI.PHI->getNumOperands() == 5 && "Unexpected PHI operands."); unsigned DstReg = PI.PHI->getOperand(0).getReg(); TII->insertSelect(*Head, FirstTerm, HeadDL, DstReg, Cond, PI.TReg, PI.FReg); DEBUG(dbgs() << " --> " << *llvm::prior(FirstTerm)); @@ -593,6 +592,7 @@ public: EarlyIfConverter() : MachineFunctionPass(ID) {} void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnMachineFunction(MachineFunction &MF); + const char *getPassName() const { return "Early If-Conversion"; } private: bool tryConvertIf(MachineBasicBlock*); diff --git a/lib/CodeGen/ErlangGC.cpp b/lib/CodeGen/ErlangGC.cpp new file mode 100644 index 000000000000..8a1e2d9c99a8 --- /dev/null +++ b/lib/CodeGen/ErlangGC.cpp @@ -0,0 +1,81 @@ +//===-- ErlangGC.cpp - Erlang/OTP GC strategy -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the Erlang/OTP runtime-compatible garbage collector +// (e.g. defines safe points, root initialization etc.) +// +// The frametable emitter is in ErlangGCPrinter.cpp. +// +//===----------------------------------------------------------------------===// + +#include "llvm/CodeGen/GCs.h" +#include "llvm/CodeGen/GCStrategy.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/MC/MCContext.h" +#include "llvm/MC/MCSymbol.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" + +using namespace llvm; + +namespace { + + class ErlangGC : public GCStrategy { + MCSymbol *InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const; + public: + ErlangGC(); + bool findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF); + }; + +} + +static GCRegistry::Add +X("erlang", "erlang-compatible garbage collector"); + +void llvm::linkErlangGC() { } + +ErlangGC::ErlangGC() { + InitRoots = false; + NeededSafePoints = 1 << GC::PostCall; + UsesMetadata = true; + CustomRoots = false; + CustomSafePoints = true; +} + +MCSymbol *ErlangGC::InsertLabel(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI, + DebugLoc DL) const { + const TargetInstrInfo* TII = MBB.getParent()->getTarget().getInstrInfo(); + MCSymbol *Label = MBB.getParent()->getContext().CreateTempSymbol(); + BuildMI(MBB, MI, DL, TII->get(TargetOpcode::GC_LABEL)).addSym(Label); + return Label; +} + +bool ErlangGC::findCustomSafePoints(GCFunctionInfo &FI, MachineFunction &MF) { + for (MachineFunction::iterator BBI = MF.begin(), BBE = MF.end(); BBI != BBE; + ++BBI) + for (MachineBasicBlock::iterator MI = BBI->begin(), ME = BBI->end(); + MI != ME; ++MI) + + if (MI->getDesc().isCall()) { + + // Do not treat tail call sites as safe points. + if (MI->getDesc().isTerminator()) + continue; + + /* Code copied from VisitCallPoint(...) */ + MachineBasicBlock::iterator RAI = MI; ++RAI; + MCSymbol* Label = InsertLabel(*MI->getParent(), RAI, MI->getDebugLoc()); + FI.addSafePoint(GC::PostCall, Label, MI->getDebugLoc()); + } + + return false; +} diff --git a/lib/CodeGen/ExecutionDepsFix.cpp b/lib/CodeGen/ExecutionDepsFix.cpp index ed78f1942150..9b0e76fa20cb 100644 --- a/lib/CodeGen/ExecutionDepsFix.cpp +++ b/lib/CodeGen/ExecutionDepsFix.cpp @@ -21,15 +21,15 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "execution-fix" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; /// A DomainValue is a bit like LiveIntervals' ValNo, but it also keeps track diff --git a/lib/CodeGen/ExpandISelPseudos.cpp b/lib/CodeGen/ExpandISelPseudos.cpp index 2c4a93543cc3..b2b68828a226 100644 --- a/lib/CodeGen/ExpandISelPseudos.cpp +++ b/lib/CodeGen/ExpandISelPseudos.cpp @@ -15,12 +15,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "expand-isel-pseudos" +#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/Debug.h" using namespace llvm; namespace { diff --git a/lib/CodeGen/ExpandPostRAPseudos.cpp b/lib/CodeGen/ExpandPostRAPseudos.cpp index ffe4b63c1b11..1611db8d91a3 100644 --- a/lib/CodeGen/ExpandPostRAPseudos.cpp +++ b/lib/CodeGen/ExpandPostRAPseudos.cpp @@ -18,11 +18,11 @@ #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { @@ -49,8 +49,6 @@ private: bool LowerSubregToReg(MachineInstr *MI); bool LowerCopy(MachineInstr *MI); - void TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI); void TransferImplicitDefs(MachineInstr *MI); }; } // end anonymous namespace @@ -61,21 +59,6 @@ char &llvm::ExpandPostRAPseudosID = ExpandPostRA::ID; INITIALIZE_PASS(ExpandPostRA, "postrapseudos", "Post-RA pseudo instruction expansion pass", false, false) -/// TransferDeadFlag - MI is a pseudo-instruction with DstReg dead, -/// and the lowered replacement instructions immediately precede it. -/// Mark the replacement instructions with the dead flag. -void -ExpandPostRA::TransferDeadFlag(MachineInstr *MI, unsigned DstReg, - const TargetRegisterInfo *TRI) { - for (MachineBasicBlock::iterator MII = - prior(MachineBasicBlock::iterator(MI)); ; --MII) { - if (MII->addRegisterDead(DstReg, TRI)) - break; - assert(MII != MI->getParent()->begin() && - "copyPhysReg output doesn't reference destination register!"); - } -} - /// TransferImplicitDefs - MI is a pseudo-instruction, and the lowered /// replacement instructions immediately precede it. Copy any implicit-def /// operands from MI to the replacement instruction. @@ -114,6 +97,12 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { DEBUG(dbgs() << "subreg: CONVERTING: " << *MI); + if (MI->allDefsAreDead()) { + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "subreg: replaced by: " << *MI); + return true; + } + if (DstSubReg == InsReg) { // No need to insert an identify copy instruction. // Watch out for case like this: @@ -135,10 +124,6 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { MachineBasicBlock::iterator CopyMI = MI; --CopyMI; CopyMI->addRegisterDefined(DstReg); - - // Transfer the kill/dead flags, if needed. - if (MI->getOperand(0).isDead()) - TransferDeadFlag(MI, DstSubReg, TRI); DEBUG(dbgs() << "subreg: " << *CopyMI); } @@ -148,6 +133,14 @@ bool ExpandPostRA::LowerSubregToReg(MachineInstr *MI) { } bool ExpandPostRA::LowerCopy(MachineInstr *MI) { + + if (MI->allDefsAreDead()) { + DEBUG(dbgs() << "dead copy: " << *MI); + MI->setDesc(TII->get(TargetOpcode::KILL)); + DEBUG(dbgs() << "replaced by: " << *MI); + return true; + } + MachineOperand &DstMO = MI->getOperand(0); MachineOperand &SrcMO = MI->getOperand(1); @@ -155,7 +148,7 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { DEBUG(dbgs() << "identity copy: " << *MI); // No need to insert an identity copy instruction, but replace with a KILL // if liveness is changed. - if (DstMO.isDead() || SrcMO.isUndef() || MI->getNumOperands() > 2) { + if (SrcMO.isUndef() || MI->getNumOperands() > 2) { // We must make sure the super-register gets killed. Replace the // instruction with KILL. MI->setDesc(TII->get(TargetOpcode::KILL)); @@ -171,8 +164,6 @@ bool ExpandPostRA::LowerCopy(MachineInstr *MI) { TII->copyPhysReg(*MI->getParent(), MI, MI->getDebugLoc(), DstMO.getReg(), SrcMO.getReg(), SrcMO.isKill()); - if (DstMO.isDead()) - TransferDeadFlag(MI, DstMO.getReg(), TRI); if (MI->getNumOperands() > 2) TransferImplicitDefs(MI); DEBUG({ diff --git a/lib/CodeGen/GCMetadata.cpp b/lib/CodeGen/GCMetadata.cpp index 1caf8c233976..ef5247c2edff 100644 --- a/lib/CodeGen/GCMetadata.cpp +++ b/lib/CodeGen/GCMetadata.cpp @@ -14,10 +14,10 @@ #include "llvm/CodeGen/GCMetadata.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Pass.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -33,25 +33,13 @@ namespace { explicit Printer(raw_ostream &OS) : FunctionPass(ID), OS(OS) {} - const char *getPassName() const; - void getAnalysisUsage(AnalysisUsage &AU) const; - - bool runOnFunction(Function &F); - }; - - class Deleter : public FunctionPass { - static char ID; - - public: - Deleter(); - const char *getPassName() const; void getAnalysisUsage(AnalysisUsage &AU) const; bool runOnFunction(Function &F); bool doFinalization(Module &M); }; - + } INITIALIZE_PASS(GCModuleInfo, "collector-metadata", @@ -182,32 +170,9 @@ bool Printer::runOnFunction(Function &F) { return false; } -// ----------------------------------------------------------------------------- - -char Deleter::ID = 0; - -FunctionPass *llvm::createGCInfoDeleter() { - return new Deleter(); -} - -Deleter::Deleter() : FunctionPass(ID) {} - -const char *Deleter::getPassName() const { - return "Delete Garbage Collector Information"; -} - -void Deleter::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequired(); -} - -bool Deleter::runOnFunction(Function &MF) { - return false; -} - -bool Deleter::doFinalization(Module &M) { +bool Printer::doFinalization(Module &M) { GCModuleInfo *GMI = getAnalysisIfAvailable(); - assert(GMI && "Deleter didn't require GCModuleInfo?!"); + assert(GMI && "Printer didn't require GCModuleInfo?!"); GMI->clear(); return false; } diff --git a/lib/CodeGen/GCStrategy.cpp b/lib/CodeGen/GCStrategy.cpp index f4755bb1635c..1173d1102125 100644 --- a/lib/CodeGen/GCStrategy.cpp +++ b/lib/CodeGen/GCStrategy.cpp @@ -16,22 +16,22 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/Passes.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" -#include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/DominatorInternals.h" +#include "llvm/Analysis/Dominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index 31e36f0168cb..9958d7daada8 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -12,24 +12,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "ifcvt" -#include "BranchFolding.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "BranchFolding.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; // Hidden options for help debugging. @@ -150,7 +151,7 @@ namespace { /// basic block number. std::vector BBAnalysis; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetInstrInfo *TII; const TargetRegisterInfo *TRI; const InstrItineraryData *InstrItins; @@ -994,14 +995,13 @@ static void UpdatePredRedefs(MachineInstr *MI, SmallSet &Redefs, Redefs.erase(*SubRegs); } } + MachineInstrBuilder MIB(*MI->getParent()->getParent(), MI); for (unsigned i = 0, e = Defs.size(); i != e; ++i) { unsigned Reg = Defs[i]; if (!Redefs.insert(Reg)) { if (AddImpUse) // Treat predicated update as read + write. - MI->addOperand(MachineOperand::CreateReg(Reg, false/*IsDef*/, - true/*IsImp*/,false/*IsKill*/, - false/*IsDead*/,true/*IsUndef*/)); + MIB.addReg(Reg, RegState::Implicit | RegState::Undef); } else { for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) Redefs.insert(*SubRegs); @@ -1557,7 +1557,7 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { if (Succ == FallThrough) continue; FromBBI.BB->removeSuccessor(Succ); - if (AddEdges) + if (AddEdges && !ToBBI.BB->isSuccessor(Succ)) ToBBI.BB->addSuccessor(Succ); } diff --git a/lib/CodeGen/InlineSpiller.cpp b/lib/CodeGen/InlineSpiller.cpp index 37828a70b56f..c6d1a18dbd06 100644 --- a/lib/CodeGen/InlineSpiller.cpp +++ b/lib/CodeGen/InlineSpiller.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "regalloc" #include "Spiller.h" -#include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" @@ -22,16 +21,17 @@ #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/CodeGen/InterferenceCache.cpp b/lib/CodeGen/InterferenceCache.cpp index 1541bf0c8512..a8e711e33bdf 100644 --- a/lib/CodeGen/InterferenceCache.cpp +++ b/lib/CodeGen/InterferenceCache.cpp @@ -13,9 +13,9 @@ #define DEBUG_TYPE "regalloc" #include "InterferenceCache.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/InterferenceCache.h b/lib/CodeGen/InterferenceCache.h index 3c928a50864b..c02fb9a1ee24 100644 --- a/lib/CodeGen/InterferenceCache.h +++ b/lib/CodeGen/InterferenceCache.h @@ -15,7 +15,7 @@ #ifndef LLVM_CODEGEN_INTERFERENCECACHE #define LLVM_CODEGEN_INTERFERENCECACHE -#include "LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" namespace llvm { diff --git a/lib/CodeGen/IntrinsicLowering.cpp b/lib/CodeGen/IntrinsicLowering.cpp index 6120ae56b4a7..07f0ccf52f8c 100644 --- a/lib/CodeGen/IntrinsicLowering.cpp +++ b/lib/CodeGen/IntrinsicLowering.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/IntrinsicLowering.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IRBuilder.h" -#include "llvm/Module.h" -#include "llvm/Type.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" using namespace llvm; template diff --git a/lib/CodeGen/LLVMBuild.txt b/lib/CodeGen/LLVMBuild.txt index fee0347ea659..81ef1aa89dd4 100644 --- a/lib/CodeGen/LLVMBuild.txt +++ b/lib/CodeGen/LLVMBuild.txt @@ -22,4 +22,4 @@ subdirectories = AsmPrinter SelectionDAG type = Library name = CodeGen parent = Libraries -required_libraries = Analysis Core MC Scalar Support Target TransformUtils +required_libraries = Analysis Core MC Scalar Support Target TransformUtils ObjCARC diff --git a/lib/CodeGen/LLVMTargetMachine.cpp b/lib/CodeGen/LLVMTargetMachine.cpp index 24daafaa62e1..1a0983783484 100644 --- a/lib/CodeGen/LLVMTargetMachine.cpp +++ b/lib/CodeGen/LLVMTargetMachine.cpp @@ -11,30 +11,30 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Transforms/Scalar.h" -#include "llvm/PassManager.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/ADT/OwningPtr.h" #include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/AsmPrinter.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCInstrInfo.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSubtargetInfo.h" -#include "llvm/ADT/OwningPtr.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" -#include "llvm/Support/FormattedStream.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; // Enable or disable FastISel. Both options are needed, because @@ -79,6 +79,10 @@ LLVMTargetMachine::LLVMTargetMachine(const Target &T, StringRef Triple, "and that InitializeAllTargetMCs() is being invoked!"); } +void LLVMTargetMachine::addAnalysisPasses(PassManagerBase &PM) { + PM.add(createBasicTargetTransformInfoPass(getTargetLowering())); +} + /// addPassesToX helper drives creation and initialization of TargetPassConfig. static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassManagerBase &PM, @@ -96,6 +100,8 @@ static MCContext *addPassesToGenerateCode(LLVMTargetMachine *TM, PassConfig->addIRPasses(); + PassConfig->addCodeGenPrepare(); + PassConfig->addPassesToHandleExceptions(); PassConfig->addISelPrepare(); @@ -191,7 +197,8 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, // emission fails. MCCodeEmitter *MCE = getTarget().createMCCodeEmitter(*getInstrInfo(), MRI, STI, *Context); - MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), TargetCPU); + MCAsmBackend *MAB = getTarget().createMCAsmBackend(getTargetTriple(), + TargetCPU); if (MCE == 0 || MAB == 0) return true; @@ -199,7 +206,7 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, *Context, *MAB, Out, MCE, hasMCRelaxAll(), hasMCNoExecStack())); - AsmStreamer.get()->InitSections(); + AsmStreamer.get()->setAutoInitSections(true); break; } case CGFT_Null: @@ -219,7 +226,6 @@ bool LLVMTargetMachine::addPassesToEmitFile(PassManagerBase &PM, PM.add(Printer); - PM.add(createGCInfoDeleter()); return false; } @@ -238,7 +244,6 @@ bool LLVMTargetMachine::addPassesToEmitMachineCode(PassManagerBase &PM, return true; addCodeEmitter(PM, JCE); - PM.add(createGCInfoDeleter()); return false; // success! } diff --git a/lib/CodeGen/LexicalScopes.cpp b/lib/CodeGen/LexicalScopes.cpp index 6b6b9d084e1f..81721541cd89 100644 --- a/lib/CodeGen/LexicalScopes.cpp +++ b/lib/CodeGen/LexicalScopes.cpp @@ -16,10 +16,10 @@ #define DEBUG_TYPE "lexicalscopes" #include "llvm/CodeGen/LexicalScopes.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstr.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/FormattedStream.h" @@ -314,24 +314,22 @@ bool LexicalScopes::dominates(DebugLoc DL, MachineBasicBlock *MBB) { void LexicalScope::anchor() { } /// dump - Print data structures. -void LexicalScope::dump() const { +void LexicalScope::dump(unsigned Indent) const { #ifndef NDEBUG raw_ostream &err = dbgs(); - err.indent(IndentLevel); + err.indent(Indent); err << "DFSIn: " << DFSIn << " DFSOut: " << DFSOut << "\n"; const MDNode *N = Desc; + err.indent(Indent); N->dump(); if (AbstractScope) - err << "Abstract Scope\n"; + err << std::string(Indent, ' ') << "Abstract Scope\n"; - IndentLevel += 2; if (!Children.empty()) - err << "Children ...\n"; + err << std::string(Indent + 2, ' ') << "Children ...\n"; for (unsigned i = 0, e = Children.size(); i != e; ++i) if (Children[i] != this) - Children[i]->dump(); - - IndentLevel -= 2; + Children[i]->dump(Indent + 2); #endif } diff --git a/lib/CodeGen/LiveDebugVariables.cpp b/lib/CodeGen/LiveDebugVariables.cpp index defc1279ec8c..0b117ac6566b 100644 --- a/lib/CodeGen/LiveDebugVariables.cpp +++ b/lib/CodeGen/LiveDebugVariables.cpp @@ -21,11 +21,6 @@ #define DEBUG_TYPE "livedebug" #include "LiveDebugVariables.h" -#include "VirtRegMap.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Metadata.h" -#include "llvm/Value.h" #include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LexicalScopes.h" @@ -35,6 +30,11 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" @@ -247,10 +247,6 @@ public: LiveIntervals &LIS, MachineDominatorTree &MDT, UserValueScopes &UVS); - /// renameRegister - Update locations to rewrite OldReg as NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI); - /// splitRegister - Replace OldReg ranges with NewRegs ranges where NewRegs is /// live. Returns true if any changes were made. bool splitRegister(unsigned OldLocNo, ArrayRef NewRegs); @@ -259,7 +255,7 @@ public: /// provided virtual register map. void rewriteLocations(VirtRegMap &VRM, const TargetRegisterInfo &TRI); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM, LiveIntervals &LIS, const TargetInstrInfo &TRI); @@ -286,6 +282,11 @@ class LDVImpl { MachineDominatorTree *MDT; const TargetRegisterInfo *TRI; + /// Whether emitDebugValues is called. + bool EmitDone; + /// Whether the machine function is modified during the pass. + bool ModifiedMF; + /// userValues - All allocated UserValue instances. SmallVector userValues; @@ -320,27 +321,30 @@ class LDVImpl { void computeIntervals(); public: - LDVImpl(LiveDebugVariables *ps) : pass(*ps) {} + LDVImpl(LiveDebugVariables *ps) : pass(*ps), EmitDone(false), + ModifiedMF(false) {} bool runOnMachineFunction(MachineFunction &mf); - /// clear - Relase all memory. + /// clear - Release all memory. void clear() { DeleteContainerPointers(userValues); userValues.clear(); virtRegToEqClass.clear(); userVarMap.clear(); + // Make sure we call emitDebugValues if the machine function was modified. + assert((!ModifiedMF || EmitDone) && + "Dbg values are not emitted in LDV"); + EmitDone = false; + ModifiedMF = false; } /// mapVirtReg - Map virtual register to an equivalence class. void mapVirtReg(unsigned VirtReg, UserValue *EC); - /// renameRegister - Replace all references to OldReg with NewReg:SubIdx. - void renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx); - /// splitRegister - Replace all references to OldReg with NewRegs. void splitRegister(unsigned OldReg, ArrayRef NewRegs); - /// emitDebugVariables - Recreate DBG_VALUE instruction from data structures. + /// emitDebugValues - Recreate DBG_VALUE instruction from data structures. void emitDebugValues(VirtRegMap *VRM); void print(raw_ostream&); @@ -693,6 +697,7 @@ bool LDVImpl::runOnMachineFunction(MachineFunction &mf) { computeIntervals(); DEBUG(print(dbgs())); LS.releaseMemory(); + ModifiedMF = Changed; return Changed; } @@ -714,45 +719,6 @@ LiveDebugVariables::~LiveDebugVariables() { delete static_cast(pImpl); } -void UserValue:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx, - const TargetRegisterInfo *TRI) { - for (unsigned i = locations.size(); i; --i) { - unsigned LocNo = i - 1; - MachineOperand &Loc = locations[LocNo]; - if (!Loc.isReg() || Loc.getReg() != OldReg) - continue; - if (TargetRegisterInfo::isPhysicalRegister(NewReg)) - Loc.substPhysReg(NewReg, *TRI); - else - Loc.substVirtReg(NewReg, SubIdx, *TRI); - coalesceLocation(LocNo); - } -} - -void LDVImpl:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - UserValue *UV = lookupVirtReg(OldReg); - if (!UV) - return; - - if (TargetRegisterInfo::isVirtualRegister(NewReg)) - mapVirtReg(NewReg, UV); - if (OldReg != NewReg) - virtRegToEqClass.erase(OldReg); - - do { - UV->renameRegister(OldReg, NewReg, SubIdx, TRI); - UV = UV->getNext(); - } while (UV); -} - -void LiveDebugVariables:: -renameRegister(unsigned OldReg, unsigned NewReg, unsigned SubIdx) { - if (pImpl) - static_cast(pImpl)->renameRegister(OldReg, NewReg, SubIdx); -} - //===----------------------------------------------------------------------===// // Live Range Splitting //===----------------------------------------------------------------------===// @@ -1011,6 +977,7 @@ void LDVImpl::emitDebugValues(VirtRegMap *VRM) { userValues[i]->rewriteLocations(*VRM, *TRI); userValues[i]->emitDebugValues(VRM, *LIS, *TII); } + EmitDone = true; } void LiveDebugVariables::emitDebugValues(VirtRegMap *VRM) { diff --git a/lib/CodeGen/LiveInterval.cpp b/lib/CodeGen/LiveInterval.cpp index 8585cbb30dee..dccd847d070c 100644 --- a/lib/CodeGen/LiveInterval.cpp +++ b/lib/CodeGen/LiveInterval.cpp @@ -19,15 +19,15 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "RegisterCoalescer.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "RegisterCoalescer.h" #include using namespace llvm; @@ -440,7 +440,7 @@ void LiveInterval::join(LiveInterval &Other, iterator OutIt = begin(); OutIt->valno = NewVNInfo[LHSValNoAssignments[OutIt->valno->id]]; - for (iterator I = next(OutIt), E = end(); I != E; ++I) { + for (iterator I = llvm::next(OutIt), E = end(); I != E; ++I) { VNInfo* nextValNo = NewVNInfo[LHSValNoAssignments[I->valno->id]]; assert(nextValNo != 0 && "Huh?"); @@ -464,10 +464,12 @@ void LiveInterval::join(LiveInterval &Other, ranges.erase(OutIt, end()); } - // Remember assignements because val# ids are changing. - SmallVector OtherAssignments; + // Rewrite Other values before changing the VNInfo ids. + // This can leave Other in an invalid state because we're not coalescing + // touching segments that now have identical values. That's OK since Other is + // not supposed to be valid after calling join(); for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) - OtherAssignments.push_back(RHSValNoAssignments[I->valno->id]); + I->valno = NewVNInfo[RHSValNoAssignments[I->valno->id]]; // Update val# info. Renumber them and make sure they all belong to this // LiveInterval now. Also remove dead val#'s. @@ -486,148 +488,9 @@ void LiveInterval::join(LiveInterval &Other, valnos.resize(NumNewVals); // shrinkify // Okay, now insert the RHS live ranges into the LHS. - unsigned RangeNo = 0; - for (iterator I = Other.begin(), E = Other.end(); I != E; ++I, ++RangeNo) { - // Map the valno in the other live range to the current live range. - I->valno = NewVNInfo[OtherAssignments[RangeNo]]; - assert(I->valno && "Adding a dead range?"); - } - mergeIntervalRanges(Other); - - verify(); -} - -/// \brief Helper function for merging in another LiveInterval's ranges. -/// -/// This is a helper routine implementing an efficient merge of another -/// LiveIntervals ranges into the current interval. -/// -/// \param LHSValNo If non-NULL, set as the new value number for every range -/// from RHS which is merged into the LHS. -/// \param RHSValNo If non-NULL, then only ranges in RHS whose original value -/// number maches this value number will be merged into LHS. -void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, - VNInfo *LHSValNo, - const VNInfo *RHSValNo) { - if (RHS.empty()) - return; - - // Ensure we're starting with a valid range. Note that we don't verify RHS - // because it may have had its value numbers adjusted in preparation for - // merging. - verify(); - - // The strategy for merging these efficiently is as follows: - // - // 1) Find the beginning of the impacted ranges in the LHS. - // 2) Create a new, merged sub-squence of ranges merging from the position in - // #1 until either LHS or RHS is exhausted. Any part of LHS between RHS - // entries being merged will be copied into this new range. - // 3) Replace the relevant section in LHS with these newly merged ranges. - // 4) Append any remaning ranges from RHS if LHS is exhausted in #2. - // - // We don't follow the typical in-place merge strategy for sorted ranges of - // appending the new ranges to the back and then using std::inplace_merge - // because one step of the merge can both mutate the original elements and - // remove elements from the original. Essentially, because the merge includes - // collapsing overlapping ranges, a more complex approach is required. - - // We do an initial binary search to optimize for a common pattern: a large - // LHS, and a very small RHS. - const_iterator RI = RHS.begin(), RE = RHS.end(); - iterator LE = end(), LI = std::upper_bound(begin(), LE, *RI); - - // Merge into NewRanges until one of the ranges is exhausted. - SmallVector NewRanges; - - // Keep track of where to begin the replacement. - iterator ReplaceI = LI; - - // If there are preceding ranges in the LHS, put the last one into NewRanges - // so we can optionally extend it. Adjust the replacement point accordingly. - if (LI != begin()) { - ReplaceI = llvm::prior(LI); - NewRanges.push_back(*ReplaceI); - } - - // Now loop over the mergable portions of both LHS and RHS, merging into - // NewRanges. - while (LI != LE && RI != RE) { - // Skip incoming ranges with the wrong value. - if (RHSValNo && RI->valno != RHSValNo) { - ++RI; - continue; - } - - // Select the first range. We pick the earliest start point, and then the - // largest range. - LiveRange R = *LI; - if (*RI < R) { - R = *RI; - ++RI; - if (LHSValNo) - R.valno = LHSValNo; - } else { - ++LI; - } - - if (NewRanges.empty()) { - NewRanges.push_back(R); - continue; - } - - LiveRange &LastR = NewRanges.back(); - if (R.valno == LastR.valno) { - // Try to merge this range into the last one. - if (R.start <= LastR.end) { - LastR.end = std::max(LastR.end, R.end); - continue; - } - } else { - // We can't merge ranges across a value number. - assert(R.start >= LastR.end && - "Cannot overlap two LiveRanges with differing ValID's"); - } - - // If all else fails, just append the range. - NewRanges.push_back(R); - } - assert(RI == RE || LI == LE); - - // Check for being able to merge into the trailing sequence of ranges on the LHS. - if (!NewRanges.empty()) - for (; LI != LE && (LI->valno == NewRanges.back().valno && - LI->start <= NewRanges.back().end); - ++LI) - NewRanges.back().end = std::max(NewRanges.back().end, LI->end); - - // Replace the ranges in the LHS with the newly merged ones. It would be - // really nice if there were a move-supporting 'replace' directly in - // SmallVector, but as there is not, we pay the price of copies to avoid - // wasted memory allocations. - SmallVectorImpl::iterator NRI = NewRanges.begin(), - NRE = NewRanges.end(); - for (; ReplaceI != LI && NRI != NRE; ++ReplaceI, ++NRI) - *ReplaceI = *NRI; - if (NRI == NRE) - ranges.erase(ReplaceI, LI); - else - ranges.insert(LI, NRI, NRE); - - // And finally insert any trailing end of RHS (if we have one). - for (; RI != RE; ++RI) { - LiveRange R = *RI; - if (LHSValNo) - R.valno = LHSValNo; - if (!ranges.empty() && - ranges.back().valno == R.valno && R.start <= ranges.back().end) - ranges.back().end = std::max(ranges.back().end, R.end); - else - ranges.push_back(R); - } - - // Ensure we finished with a valid new sequence of ranges. - verify(); + LiveRangeUpdater Updater(this); + for (iterator I = Other.begin(), E = Other.end(); I != E; ++I) + Updater.add(*I); } /// MergeRangesInAsValue - Merge all of the intervals in RHS into this live @@ -636,7 +499,9 @@ void LiveInterval::mergeIntervalRanges(const LiveInterval &RHS, /// the overlapping LiveRanges have the specified value number. void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueInAsValue - Merge all of the live ranges of a specific val# @@ -647,7 +512,10 @@ void LiveInterval::MergeRangesInAsValue(const LiveInterval &RHS, void LiveInterval::MergeValueInAsValue(const LiveInterval &RHS, const VNInfo *RHSValNo, VNInfo *LHSValNo) { - mergeIntervalRanges(RHS, LHSValNo, RHSValNo); + LiveRangeUpdater Updater(this); + for (const_iterator I = RHS.begin(), E = RHS.end(); I != E; ++I) + if (I->valno == RHSValNo) + Updater.add(I->start, I->end, LHSValNo); } /// MergeValueNumberInto - This method is called when two value nubmers @@ -785,6 +653,206 @@ void LiveRange::print(raw_ostream &os) const { os << *this; } +//===----------------------------------------------------------------------===// +// LiveRangeUpdater class +//===----------------------------------------------------------------------===// +// +// The LiveRangeUpdater class always maintains these invariants: +// +// - When LastStart is invalid, Spills is empty and the iterators are invalid. +// This is the initial state, and the state created by flush(). +// In this state, isDirty() returns false. +// +// Otherwise, segments are kept in three separate areas: +// +// 1. [begin; WriteI) at the front of LI. +// 2. [ReadI; end) at the back of LI. +// 3. Spills. +// +// - LI.begin() <= WriteI <= ReadI <= LI.end(). +// - Segments in all three areas are fully ordered and coalesced. +// - Segments in area 1 precede and can't coalesce with segments in area 2. +// - Segments in Spills precede and can't coalesce with segments in area 2. +// - No coalescing is possible between segments in Spills and segments in area +// 1, and there are no overlapping segments. +// +// The segments in Spills are not ordered with respect to the segments in area +// 1. They need to be merged. +// +// When they exist, Spills.back().start <= LastStart, +// and WriteI[-1].start <= LastStart. + +void LiveRangeUpdater::print(raw_ostream &OS) const { + if (!isDirty()) { + if (LI) + OS << "Clean " << PrintReg(LI->reg) << " updater: " << *LI << '\n'; + else + OS << "Null updater.\n"; + return; + } + assert(LI && "Can't have null LI in dirty updater."); + OS << PrintReg(LI->reg) << " updater with gap = " << (ReadI - WriteI) + << ", last start = " << LastStart + << ":\n Area 1:"; + for (LiveInterval::const_iterator I = LI->begin(); I != WriteI; ++I) + OS << ' ' << *I; + OS << "\n Spills:"; + for (unsigned I = 0, E = Spills.size(); I != E; ++I) + OS << ' ' << Spills[I]; + OS << "\n Area 2:"; + for (LiveInterval::const_iterator I = ReadI, E = LI->end(); I != E; ++I) + OS << ' ' << *I; + OS << '\n'; +} + +void LiveRangeUpdater::dump() const +{ + print(errs()); +} + +// Determine if A and B should be coalesced. +static inline bool coalescable(const LiveRange &A, const LiveRange &B) { + assert(A.start <= B.start && "Unordered live ranges."); + if (A.end == B.start) + return A.valno == B.valno; + if (A.end < B.start) + return false; + assert(A.valno == B.valno && "Cannot overlap different values"); + return true; +} + +void LiveRangeUpdater::add(LiveRange Seg) { + assert(LI && "Cannot add to a null destination"); + + // Flush the state if Start moves backwards. + if (!LastStart.isValid() || LastStart > Seg.start) { + if (isDirty()) + flush(); + // This brings us to an uninitialized state. Reinitialize. + assert(Spills.empty() && "Leftover spilled segments"); + WriteI = ReadI = LI->begin(); + } + + // Remember start for next time. + LastStart = Seg.start; + + // Advance ReadI until it ends after Seg.start. + LiveInterval::iterator E = LI->end(); + if (ReadI != E && ReadI->end <= Seg.start) { + // First try to close the gap between WriteI and ReadI with spills. + if (ReadI != WriteI) + mergeSpills(); + // Then advance ReadI. + if (ReadI == WriteI) + ReadI = WriteI = LI->find(Seg.start); + else + while (ReadI != E && ReadI->end <= Seg.start) + *WriteI++ = *ReadI++; + } + + assert(ReadI == E || ReadI->end > Seg.start); + + // Check if the ReadI segment begins early. + if (ReadI != E && ReadI->start <= Seg.start) { + assert(ReadI->valno == Seg.valno && "Cannot overlap different values"); + // Bail if Seg is completely contained in ReadI. + if (ReadI->end >= Seg.end) + return; + // Coalesce into Seg. + Seg.start = ReadI->start; + ++ReadI; + } + + // Coalesce as much as possible from ReadI into Seg. + while (ReadI != E && coalescable(Seg, *ReadI)) { + Seg.end = std::max(Seg.end, ReadI->end); + ++ReadI; + } + + // Try coalescing Spills.back() into Seg. + if (!Spills.empty() && coalescable(Spills.back(), Seg)) { + Seg.start = Spills.back().start; + Seg.end = std::max(Spills.back().end, Seg.end); + Spills.pop_back(); + } + + // Try coalescing Seg into WriteI[-1]. + if (WriteI != LI->begin() && coalescable(WriteI[-1], Seg)) { + WriteI[-1].end = std::max(WriteI[-1].end, Seg.end); + return; + } + + // Seg doesn't coalesce with anything, and needs to be inserted somewhere. + if (WriteI != ReadI) { + *WriteI++ = Seg; + return; + } + + // Finally, append to LI or Spills. + if (WriteI == E) { + LI->ranges.push_back(Seg); + WriteI = ReadI = LI->ranges.end(); + } else + Spills.push_back(Seg); +} + +// Merge as many spilled segments as possible into the gap between WriteI +// and ReadI. Advance WriteI to reflect the inserted instructions. +void LiveRangeUpdater::mergeSpills() { + // Perform a backwards merge of Spills and [SpillI;WriteI). + size_t GapSize = ReadI - WriteI; + size_t NumMoved = std::min(Spills.size(), GapSize); + LiveInterval::iterator Src = WriteI; + LiveInterval::iterator Dst = Src + NumMoved; + LiveInterval::iterator SpillSrc = Spills.end(); + LiveInterval::iterator B = LI->begin(); + + // This is the new WriteI position after merging spills. + WriteI = Dst; + + // Now merge Src and Spills backwards. + while (Src != Dst) { + if (Src != B && Src[-1].start > SpillSrc[-1].start) + *--Dst = *--Src; + else + *--Dst = *--SpillSrc; + } + assert(NumMoved == size_t(Spills.end() - SpillSrc)); + Spills.erase(SpillSrc, Spills.end()); +} + +void LiveRangeUpdater::flush() { + if (!isDirty()) + return; + // Clear the dirty state. + LastStart = SlotIndex(); + + assert(LI && "Cannot add to a null destination"); + + // Nothing to merge? + if (Spills.empty()) { + LI->ranges.erase(WriteI, ReadI); + LI->verify(); + return; + } + + // Resize the WriteI - ReadI gap to match Spills. + size_t GapSize = ReadI - WriteI; + if (GapSize < Spills.size()) { + // The gap is too small. Make some room. + size_t WritePos = WriteI - LI->begin(); + LI->ranges.insert(ReadI, Spills.size() - GapSize, LiveRange()); + // This also invalidated ReadI, but it is recomputed below. + WriteI = LI->ranges.begin() + WritePos; + } else { + // Shrink the gap if necessary. + LI->ranges.erase(WriteI + Spills.size(), ReadI); + } + ReadI = WriteI + Spills.size(); + mergeSpills(); + LI->verify(); +} + unsigned ConnectedVNInfoEqClasses::Classify(const LiveInterval *LI) { // Create initial equivalence classes. EqClass.clear(); diff --git a/lib/CodeGen/LiveIntervalAnalysis.cpp b/lib/CodeGen/LiveIntervalAnalysis.cpp index 4e75d892e523..f1b839481131 100644 --- a/lib/CodeGen/LiveIntervalAnalysis.cpp +++ b/lib/CodeGen/LiveIntervalAnalysis.cpp @@ -17,34 +17,29 @@ #define DEBUG_TYPE "regalloc" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Value.h" +#include "LiveRangeCalc.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/STLExtras.h" -#include "LiveRangeCalc.h" -#include "VirtRegMap.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include -#include #include +#include using namespace llvm; -// Switch to the new experimental algorithm for computing live intervals. -static cl::opt -NewLiveIntervals("new-live-intervals", cl::Hidden, - cl::desc("Use new algorithm forcomputing live intervals")); - char LiveIntervals::ID = 0; char &llvm::LiveIntervalsID = LiveIntervals::ID; INITIALIZE_PASS_BEGIN(LiveIntervals, "liveintervals", @@ -60,6 +55,9 @@ void LiveIntervals::getAnalysisUsage(AnalysisUsage &AU) const { AU.setPreservesCFG(); AU.addRequired(); AU.addPreserved(); + // LiveVariables isn't really required by this analysis, it is only required + // here to make sure it is live during TwoAddressInstructionPass and + // PHIElimination. This is temporary. AU.addRequired(); AU.addPreserved(); AU.addPreservedID(MachineLoopInfoID); @@ -105,7 +103,6 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); AA = &getAnalysis(); - LV = &getAnalysis(); Indexes = &getAnalysis(); DomTree = &getAnalysis(); if (!LRCalc) @@ -114,16 +111,8 @@ bool LiveIntervals::runOnMachineFunction(MachineFunction &fn) { // Allocate space for all virtual registers. VirtRegIntervals.resize(MRI->getNumVirtRegs()); - if (NewLiveIntervals) { - // This is the new way of computing live intervals. - // It is independent of LiveVariables, and it can run at any time. - computeVirtRegs(); - computeRegMasks(); - } else { - // This is the old way of computing live intervals. - // It depends on LiveVariables. - computeIntervals(); - } + computeVirtRegs(); + computeRegMasks(); computeLiveInRegUnits(); DEBUG(dump()); @@ -165,298 +154,6 @@ void LiveIntervals::dumpInstrs() const { } #endif -static -bool MultipleDefsBySameMI(const MachineInstr &MI, unsigned MOIdx) { - unsigned Reg = MI.getOperand(MOIdx).getReg(); - for (unsigned i = MOIdx+1, e = MI.getNumOperands(); i < e; ++i) { - const MachineOperand &MO = MI.getOperand(i); - if (!MO.isReg()) - continue; - if (MO.getReg() == Reg && MO.isDef()) { - assert(MI.getOperand(MOIdx).getSubReg() != MO.getSubReg() && - MI.getOperand(MOIdx).getSubReg() && - (MO.getSubReg() || MO.isImplicit())); - return true; - } - } - return false; -} - -/// isPartialRedef - Return true if the specified def at the specific index is -/// partially re-defining the specified live interval. A common case of this is -/// a definition of the sub-register. -bool LiveIntervals::isPartialRedef(SlotIndex MIIdx, MachineOperand &MO, - LiveInterval &interval) { - if (!MO.getSubReg() || MO.isEarlyClobber()) - return false; - - SlotIndex RedefIndex = MIIdx.getRegSlot(); - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - MachineInstr *DefMI = getInstructionFromIndex(OldLR->valno->def); - if (DefMI != 0) { - return DefMI->findRegisterDefOperandIdx(interval.reg) != -1; - } - return false; -} - -void LiveIntervals::handleVirtualRegisterDef(MachineBasicBlock *mbb, - MachineBasicBlock::iterator mi, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx, - LiveInterval &interval) { - DEBUG(dbgs() << "\t\tregister: " << PrintReg(interval.reg, TRI)); - - // Virtual registers may be defined multiple times (due to phi - // elimination and 2-addr elimination). Much of what we do only has to be - // done once for the vreg. We use an empty interval to detect the first - // time we see a vreg. - LiveVariables::VarInfo& vi = LV->getVarInfo(interval.reg); - if (interval.empty()) { - // Get the Idx of the defining instructions. - SlotIndex defIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - // Make sure the first definition is not a partial redefinition. - assert(!MO.readsReg() && "First def cannot also read virtual register " - "missing flag?"); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - assert(ValNo->id == 0 && "First value in interval is not 0?"); - - // Loop over all of the blocks that the vreg is defined in. There are - // two cases we have to handle here. The most common case is a vreg - // whose lifetime is contained within a basic block. In this case there - // will be a single kill, in MBB, which comes after the definition. - if (vi.Kills.size() == 1 && vi.Kills[0]->getParent() == mbb) { - // FIXME: what about dead vars? - SlotIndex killIdx; - if (vi.Kills[0] != mi) - killIdx = getInstructionIndex(vi.Kills[0]).getRegSlot(); - else - killIdx = defIndex.getDeadSlot(); - - // If the kill happens after the definition, we have an intra-block - // live range. - if (killIdx > defIndex) { - assert(vi.AliveBlocks.empty() && - "Shouldn't be alive across any blocks!"); - LiveRange LR(defIndex, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR << "\n"); - return; - } - } - - // The other case we handle is when a virtual register lives to the end - // of the defining block, potentially live across some blocks, then is - // live into some number of blocks, but gets killed. Start by adding a - // range that goes from this definition to the end of the defining block. - LiveRange NewLR(defIndex, getMBBEndIdx(mbb), ValNo); - DEBUG(dbgs() << " +" << NewLR); - interval.addRange(NewLR); - - bool PHIJoin = LV->isPHIJoin(interval.reg); - - if (PHIJoin) { - // A phi join register is killed at the end of the MBB and revived as a - // new valno in the killing blocks. - assert(vi.AliveBlocks.empty() && "Phi join can't pass through blocks"); - DEBUG(dbgs() << " phi-join"); - } else { - // Iterate over all of the blocks that the variable is completely - // live in, adding [insrtIndex(begin), instrIndex(end)+4) to the - // live interval. - for (SparseBitVector<>::iterator I = vi.AliveBlocks.begin(), - E = vi.AliveBlocks.end(); I != E; ++I) { - MachineBasicBlock *aliveBlock = MF->getBlockNumbered(*I); - LiveRange LR(getMBBStartIdx(aliveBlock), getMBBEndIdx(aliveBlock), - ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - } - - // Finally, this virtual register is live from the start of any killing - // block to the 'use' slot of the killing instruction. - for (unsigned i = 0, e = vi.Kills.size(); i != e; ++i) { - MachineInstr *Kill = vi.Kills[i]; - SlotIndex Start = getMBBStartIdx(Kill->getParent()); - SlotIndex killIdx = getInstructionIndex(Kill).getRegSlot(); - - // Create interval with one of a NEW value number. Note that this value - // number isn't actually defined by an instruction, weird huh? :) - if (PHIJoin) { - assert(getInstructionFromIndex(Start) == 0 && - "PHI def index points at actual instruction."); - ValNo = interval.getNextValue(Start, VNInfoAllocator); - } - LiveRange LR(Start, killIdx, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " +" << LR); - } - - } else { - if (MultipleDefsBySameMI(*mi, MOIdx)) - // Multiple defs of the same virtual register by the same instruction. - // e.g. %reg1031:5, %reg1031:6 = VLD1q16 %reg1024, ... - // This is likely due to elimination of REG_SEQUENCE instructions. Return - // here since there is nothing to do. - return; - - // If this is the second time we see a virtual register definition, it - // must be due to phi elimination or two addr elimination. If this is - // the result of two address elimination, then the vreg is one of the - // def-and-use register operand. - - // It may also be partial redef like this: - // 80 %reg1041:6 = VSHRNv4i16 %reg1034, 12, pred:14, pred:%reg0 - // 120 %reg1041:5 = VSHRNv4i16 %reg1039, 12, pred:14, pred:%reg0 - bool PartReDef = isPartialRedef(MIIdx, MO, interval); - if (PartReDef || mi->isRegTiedToUseOperand(MOIdx)) { - // If this is a two-address definition, then we have already processed - // the live range. The only problem is that we didn't realize there - // are actually two values in the live interval. Because of this we - // need to take the LiveRegion that defines this register and split it - // into two values. - SlotIndex RedefIndex = MIIdx.getRegSlot(MO.isEarlyClobber()); - - const LiveRange *OldLR = - interval.getLiveRangeContaining(RedefIndex.getRegSlot(true)); - VNInfo *OldValNo = OldLR->valno; - SlotIndex DefIndex = OldValNo->def.getRegSlot(); - - // Delete the previous value, which should be short and continuous, - // because the 2-addr copy must be in the same MBB as the redef. - interval.removeRange(DefIndex, RedefIndex); - - // The new value number (#1) is defined by the instruction we claimed - // defined value #0. - VNInfo *ValNo = interval.createValueCopy(OldValNo, VNInfoAllocator); - - // Value#0 is now defined by the 2-addr instruction. - OldValNo->def = RedefIndex; - - // Add the new live interval which replaces the range for the input copy. - LiveRange LR(DefIndex, RedefIndex, ValNo); - DEBUG(dbgs() << " replace range with " << LR); - interval.addRange(LR); - - // If this redefinition is dead, we need to add a dummy unit live - // range covering the def slot. - if (MO.isDead()) - interval.addRange(LiveRange(RedefIndex, RedefIndex.getDeadSlot(), - OldValNo)); - - DEBUG(dbgs() << " RESULT: " << interval); - } else if (LV->isPHIJoin(interval.reg)) { - // In the case of PHI elimination, each variable definition is only - // live until the end of the block. We've already taken care of the - // rest of the live range. - - SlotIndex defIndex = MIIdx.getRegSlot(); - if (MO.isEarlyClobber()) - defIndex = MIIdx.getRegSlot(true); - - VNInfo *ValNo = interval.getNextValue(defIndex, VNInfoAllocator); - - SlotIndex killIndex = getMBBEndIdx(mbb); - LiveRange LR(defIndex, killIndex, ValNo); - interval.addRange(LR); - DEBUG(dbgs() << " phi-join +" << LR); - } else { - llvm_unreachable("Multiply defined register"); - } - } - - DEBUG(dbgs() << '\n'); -} - -void LiveIntervals::handleRegisterDef(MachineBasicBlock *MBB, - MachineBasicBlock::iterator MI, - SlotIndex MIIdx, - MachineOperand& MO, - unsigned MOIdx) { - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) - handleVirtualRegisterDef(MBB, MI, MIIdx, MO, MOIdx, - getOrCreateInterval(MO.getReg())); -} - -/// computeIntervals - computes the live intervals for virtual -/// registers. for some ordering of the machine instructions [1,N] a -/// live interval is an interval [i, j) where 1 <= i <= j < N for -/// which a variable is live -void LiveIntervals::computeIntervals() { - DEBUG(dbgs() << "********** COMPUTING LIVE INTERVALS **********\n" - << "********** Function: " << MF->getName() << '\n'); - - RegMaskBlocks.resize(MF->getNumBlockIDs()); - - SmallVector UndefUses; - for (MachineFunction::iterator MBBI = MF->begin(), E = MF->end(); - MBBI != E; ++MBBI) { - MachineBasicBlock *MBB = MBBI; - RegMaskBlocks[MBB->getNumber()].first = RegMaskSlots.size(); - - if (MBB->empty()) - continue; - - // Track the index of the current machine instr. - SlotIndex MIIndex = getMBBStartIdx(MBB); - DEBUG(dbgs() << "BB#" << MBB->getNumber() - << ":\t\t# derived from " << MBB->getName() << "\n"); - - // Skip over empty initial indices. - if (getInstructionFromIndex(MIIndex) == 0) - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - - for (MachineBasicBlock::iterator MI = MBB->begin(), miEnd = MBB->end(); - MI != miEnd; ++MI) { - DEBUG(dbgs() << MIIndex << "\t" << *MI); - if (MI->isDebugValue()) - continue; - assert(Indexes->getInstructionFromIndex(MIIndex) == MI && - "Lost SlotIndex synchronization"); - - // Handle defs. - for (int i = MI->getNumOperands() - 1; i >= 0; --i) { - MachineOperand &MO = MI->getOperand(i); - - // Collect register masks. - if (MO.isRegMask()) { - RegMaskSlots.push_back(MIIndex.getRegSlot()); - RegMaskBits.push_back(MO.getRegMask()); - continue; - } - - if (!MO.isReg() || !TargetRegisterInfo::isVirtualRegister(MO.getReg())) - continue; - - // handle register defs - build intervals - if (MO.isDef()) - handleRegisterDef(MBB, MI, MIIndex, MO, i); - else if (MO.isUndef()) - UndefUses.push_back(MO.getReg()); - } - - // Move to the next instr slot. - MIIndex = Indexes->getNextNonNullIndex(MIIndex); - } - - // Compute the number of register mask instructions in this block. - std::pair &RMB = RegMaskBlocks[MBB->getNumber()]; - RMB.second = RegMaskSlots.size() - RMB.first; - } - - // Create empty intervals for registers defined by implicit_def's (except - // for those implicit_def that define values which are liveout of their - // blocks. - for (unsigned i = 0, e = UndefUses.size(); i != e; ++i) { - unsigned UndefReg = UndefUses[i]; - (void)getOrCreateInterval(UndefReg); - } -} - LiveInterval* LiveIntervals::createInterval(unsigned reg) { float Weight = TargetRegisterInfo::isPhysicalRegister(reg) ? HUGE_VALF : 0.0F; return new LiveInterval(reg, Weight); @@ -1275,9 +972,9 @@ private: // Return the last use of reg between NewIdx and OldIdx. SlotIndex findLastUseBefore(unsigned Reg) { - SlotIndex LastUse = NewIdx; if (TargetRegisterInfo::isVirtualRegister(Reg)) { + SlotIndex LastUse = NewIdx; for (MachineRegisterInfo::use_nodbg_iterator UI = MRI.use_nodbg_begin(Reg), UE = MRI.use_nodbg_end(); @@ -1287,26 +984,42 @@ private: if (InstSlot > LastUse && InstSlot < OldIdx) LastUse = InstSlot; } - } else { - MachineInstr* MI = LIS.getSlotIndexes()->getInstructionFromIndex(NewIdx); - MachineBasicBlock::iterator MII(MI); - ++MII; - MachineBasicBlock* MBB = MI->getParent(); - for (; MII != MBB->end() && LIS.getInstructionIndex(MII) < OldIdx; ++MII){ - for (MachineInstr::mop_iterator MOI = MII->operands_begin(), - MOE = MII->operands_end(); - MOI != MOE; ++MOI) { - const MachineOperand& mop = *MOI; - if (!mop.isReg() || mop.getReg() == 0 || - TargetRegisterInfo::isVirtualRegister(mop.getReg())) - continue; - - if (TRI.hasRegUnit(mop.getReg(), Reg)) - LastUse = LIS.getInstructionIndex(MII); - } - } + return LastUse; } - return LastUse; + + // This is a regunit interval, so scanning the use list could be very + // expensive. Scan upwards from OldIdx instead. + assert(NewIdx < OldIdx && "Expected upwards move"); + SlotIndexes *Indexes = LIS.getSlotIndexes(); + MachineBasicBlock *MBB = Indexes->getMBBFromIndex(NewIdx); + + // OldIdx may not correspond to an instruction any longer, so set MII to + // point to the next instruction after OldIdx, or MBB->end(). + MachineBasicBlock::iterator MII = MBB->end(); + if (MachineInstr *MI = Indexes->getInstructionFromIndex( + Indexes->getNextNonNullIndex(OldIdx))) + if (MI->getParent() == MBB) + MII = MI; + + MachineBasicBlock::iterator Begin = MBB->begin(); + while (MII != Begin) { + if ((--MII)->isDebugValue()) + continue; + SlotIndex Idx = Indexes->getInstructionIndex(MII); + + // Stop searching when NewIdx is reached. + if (!SlotIndex::isEarlierInstr(NewIdx, Idx)) + return NewIdx; + + // Check if MII uses Reg. + for (MIBundleOperands MO(MII); MO.isValid(); ++MO) + if (MO->isReg() && + TargetRegisterInfo::isPhysicalRegister(MO->getReg()) && + TRI.hasRegUnit(MO->getReg(), Reg)) + return Idx; + } + // Didn't reach NewIdx. It must be the first instruction in the block. + return NewIdx; } }; @@ -1331,3 +1044,129 @@ void LiveIntervals::handleMoveIntoBundle(MachineInstr* MI, HMEditor HME(*this, *MRI, *TRI, OldIndex, NewIndex, UpdateFlags); HME.updateAllRanges(MI); } + +void +LiveIntervals::repairIntervalsInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End, + ArrayRef OrigRegs) { + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !Indexes->hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !Indexes->hasIndex(End)) + ++End; + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB).getPrevSlot(); + else + endIdx = getInstructionIndex(End); + + Indexes->repairIndexesInRange(MBB, Begin, End); + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + for (MachineInstr::const_mop_iterator MOI = MI->operands_begin(), + MOE = MI->operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg() && + TargetRegisterInfo::isVirtualRegister(MOI->getReg()) && + !hasInterval(MOI->getReg())) { + LiveInterval &LI = getOrCreateInterval(MOI->getReg()); + computeVirtRegInterval(&LI); + } + } + } + + for (unsigned i = 0, e = OrigRegs.size(); i != e; ++i) { + unsigned Reg = OrigRegs[i]; + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + LiveInterval &LI = getInterval(Reg); + // FIXME: Should we support undefs that gain defs? + if (!LI.hasAtLeastOneValue()) + continue; + + LiveInterval::iterator LII = LI.find(endIdx); + SlotIndex lastUseIdx; + if (LII != LI.end() && LII->start < endIdx) + lastUseIdx = LII->end; + else + --LII; + + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (MI->isDebugValue()) + continue; + + SlotIndex instrIdx = getInstructionIndex(MI); + bool isStartValid = getInstructionFromIndex(LII->start); + bool isEndValid = getInstructionFromIndex(LII->end); + + // FIXME: This doesn't currently handle early-clobber or multiple removed + // defs inside of the region to repair. + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + const MachineOperand &MO = *OI; + if (!MO.isReg() || MO.getReg() != Reg) + continue; + + if (MO.isDef()) { + if (!isStartValid) { + if (LII->end.isDead()) { + SlotIndex prevStart; + if (LII != LI.begin()) + prevStart = llvm::prior(LII)->start; + + // FIXME: This could be more efficient if there was a removeRange + // method that returned an iterator. + LI.removeRange(*LII, true); + if (prevStart.isValid()) + LII = LI.find(prevStart); + else + LII = LI.begin(); + } else { + LII->start = instrIdx.getRegSlot(); + LII->valno->def = instrIdx.getRegSlot(); + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + continue; + } + } + + if (!lastUseIdx.isValid()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), instrIdx.getDeadSlot(), VNI); + LII = LI.addRange(LR); + } else if (LII->start != instrIdx.getRegSlot()) { + VNInfo *VNI = LI.getNextValue(instrIdx.getRegSlot(), + VNInfoAllocator); + LiveRange LR(instrIdx.getRegSlot(), lastUseIdx, VNI); + LII = LI.addRange(LR); + } + + if (MO.getSubReg() && !MO.isUndef()) + lastUseIdx = instrIdx.getRegSlot(); + else + lastUseIdx = SlotIndex(); + } else if (MO.isUse()) { + // FIXME: This should probably be handled outside of this branch, + // either as part of the def case (for defs inside of the region) or + // after the loop over the region. + if (!isEndValid && !LII->end.isBlock()) + LII->end = instrIdx.getRegSlot(); + if (!lastUseIdx.isValid()) + lastUseIdx = instrIdx.getRegSlot(); + } + } + } + } +} diff --git a/lib/CodeGen/LiveIntervalUnion.cpp b/lib/CodeGen/LiveIntervalUnion.cpp index dadd02bfc654..d5a81a311c64 100644 --- a/lib/CodeGen/LiveIntervalUnion.cpp +++ b/lib/CodeGen/LiveIntervalUnion.cpp @@ -14,13 +14,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveIntervalUnion.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" #include "llvm/ADT/SparseBitVector.h" -#include "llvm/CodeGen/MachineLoopRanges.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetRegisterInfo.h" - #include using namespace llvm; @@ -182,33 +180,6 @@ collectInterferingVRegs(unsigned MaxInterferingRegs) { return InterferingVRegs.size(); } -bool LiveIntervalUnion::Query::checkLoopInterference(MachineLoopRange *Loop) { - // VirtReg is likely live throughout the loop, so start by checking LIU-Loop - // overlaps. - IntervalMapOverlaps - Overlaps(LiveUnion->getMap(), Loop->getMap()); - if (!Overlaps.valid()) - return false; - - // The loop is overlapping an LIU assignment. Check VirtReg as well. - LiveInterval::iterator VRI = VirtReg->find(Overlaps.start()); - - for (;;) { - if (VRI == VirtReg->end()) - return false; - if (VRI->start < Overlaps.stop()) - return true; - - Overlaps.advanceTo(VRI->start); - if (!Overlaps.valid()) - return false; - if (Overlaps.start() < VRI->end) - return true; - - VRI = VirtReg->advanceTo(VRI, Overlaps.start()); - } -} - void LiveIntervalUnion::Array::init(LiveIntervalUnion::Allocator &Alloc, unsigned NSize) { // Reuse existing allocation. diff --git a/lib/CodeGen/LiveIntervalUnion.h b/lib/CodeGen/LiveIntervalUnion.h deleted file mode 100644 index 4d41fca85ad3..000000000000 --- a/lib/CodeGen/LiveIntervalUnion.h +++ /dev/null @@ -1,210 +0,0 @@ -//===-- LiveIntervalUnion.h - Live interval union data struct --*- C++ -*--===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// LiveIntervalUnion is a union of live segments across multiple live virtual -// registers. This may be used during coalescing to represent a congruence -// class, or during register allocation to model liveness of a physical -// register. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LIVEINTERVALUNION -#define LLVM_CODEGEN_LIVEINTERVALUNION - -#include "llvm/ADT/IntervalMap.h" -#include "llvm/CodeGen/LiveInterval.h" - -namespace llvm { - -class MachineLoopRange; -class TargetRegisterInfo; - -#ifndef NDEBUG -// forward declaration -template class SparseBitVector; -typedef SparseBitVector<128> LiveVirtRegBitSet; -#endif - -/// Compare a live virtual register segment to a LiveIntervalUnion segment. -inline bool -overlap(const LiveRange &VRSeg, - const IntervalMap::const_iterator &LUSeg) { - return VRSeg.start < LUSeg.stop() && LUSeg.start() < VRSeg.end; -} - -/// Union of live intervals that are strong candidates for coalescing into a -/// single register (either physical or virtual depending on the context). We -/// expect the constituent live intervals to be disjoint, although we may -/// eventually make exceptions to handle value-based interference. -class LiveIntervalUnion { - // A set of live virtual register segments that supports fast insertion, - // intersection, and removal. - // Mapping SlotIndex intervals to virtual register numbers. - typedef IntervalMap LiveSegments; - -public: - // SegmentIter can advance to the next segment ordered by starting position - // which may belong to a different live virtual register. We also must be able - // to reach the current segment's containing virtual register. - typedef LiveSegments::iterator SegmentIter; - - // LiveIntervalUnions share an external allocator. - typedef LiveSegments::Allocator Allocator; - - class Query; - -private: - unsigned Tag; // unique tag for current contents. - LiveSegments Segments; // union of virtual reg segments - -public: - explicit LiveIntervalUnion(Allocator &a) : Tag(0), Segments(a) {} - - // Iterate over all segments in the union of live virtual registers ordered - // by their starting position. - SegmentIter begin() { return Segments.begin(); } - SegmentIter end() { return Segments.end(); } - SegmentIter find(SlotIndex x) { return Segments.find(x); } - bool empty() const { return Segments.empty(); } - SlotIndex startIndex() const { return Segments.start(); } - - // Provide public access to the underlying map to allow overlap iteration. - typedef LiveSegments Map; - const Map &getMap() { return Segments; } - - /// getTag - Return an opaque tag representing the current state of the union. - unsigned getTag() const { return Tag; } - - /// changedSince - Return true if the union change since getTag returned tag. - bool changedSince(unsigned tag) const { return tag != Tag; } - - // Add a live virtual register to this union and merge its segments. - void unify(LiveInterval &VirtReg); - - // Remove a live virtual register's segments from this union. - void extract(LiveInterval &VirtReg); - - // Remove all inserted virtual registers. - void clear() { Segments.clear(); ++Tag; } - - // Print union, using TRI to translate register names - void print(raw_ostream &OS, const TargetRegisterInfo *TRI) const; - -#ifndef NDEBUG - // Verify the live intervals in this union and add them to the visited set. - void verify(LiveVirtRegBitSet& VisitedVRegs); -#endif - - /// Query interferences between a single live virtual register and a live - /// interval union. - class Query { - LiveIntervalUnion *LiveUnion; - LiveInterval *VirtReg; - LiveInterval::iterator VirtRegI; // current position in VirtReg - SegmentIter LiveUnionI; // current position in LiveUnion - SmallVector InterferingVRegs; - bool CheckedFirstInterference; - bool SeenAllInterferences; - bool SeenUnspillableVReg; - unsigned Tag, UserTag; - - public: - Query(): LiveUnion(), VirtReg(), Tag(0), UserTag(0) {} - - Query(LiveInterval *VReg, LiveIntervalUnion *LIU): - LiveUnion(LIU), VirtReg(VReg), CheckedFirstInterference(false), - SeenAllInterferences(false), SeenUnspillableVReg(false) - {} - - void clear() { - LiveUnion = NULL; - VirtReg = NULL; - InterferingVRegs.clear(); - CheckedFirstInterference = false; - SeenAllInterferences = false; - SeenUnspillableVReg = false; - Tag = 0; - UserTag = 0; - } - - void init(unsigned UTag, LiveInterval *VReg, LiveIntervalUnion *LIU) { - assert(VReg && LIU && "Invalid arguments"); - if (UserTag == UTag && VirtReg == VReg && - LiveUnion == LIU && !LIU->changedSince(Tag)) { - // Retain cached results, e.g. firstInterference. - return; - } - clear(); - LiveUnion = LIU; - VirtReg = VReg; - Tag = LIU->getTag(); - UserTag = UTag; - } - - LiveInterval &virtReg() const { - assert(VirtReg && "uninitialized"); - return *VirtReg; - } - - // Does this live virtual register interfere with the union? - bool checkInterference() { return collectInterferingVRegs(1); } - - // Count the virtual registers in this union that interfere with this - // query's live virtual register, up to maxInterferingRegs. - unsigned collectInterferingVRegs(unsigned MaxInterferingRegs = UINT_MAX); - - // Was this virtual register visited during collectInterferingVRegs? - bool isSeenInterference(LiveInterval *VReg) const; - - // Did collectInterferingVRegs collect all interferences? - bool seenAllInterferences() const { return SeenAllInterferences; } - - // Did collectInterferingVRegs encounter an unspillable vreg? - bool seenUnspillableVReg() const { return SeenUnspillableVReg; } - - // Vector generated by collectInterferingVRegs. - const SmallVectorImpl &interferingVRegs() const { - return InterferingVRegs; - } - - /// checkLoopInterference - Return true if there is interference overlapping - /// Loop. - bool checkLoopInterference(MachineLoopRange*); - - private: - Query(const Query&) LLVM_DELETED_FUNCTION; - void operator=(const Query&) LLVM_DELETED_FUNCTION; - }; - - // Array of LiveIntervalUnions. - class Array { - unsigned Size; - LiveIntervalUnion *LIUs; - public: - Array() : Size(0), LIUs(0) {} - ~Array() { clear(); } - - // Initialize the array to have Size entries. - // Reuse an existing allocation if the size matches. - void init(LiveIntervalUnion::Allocator&, unsigned Size); - - unsigned size() const { return Size; } - - void clear(); - - LiveIntervalUnion& operator[](unsigned idx) { - assert(idx < Size && "idx out of bounds"); - return LIUs[idx]; - } - }; -}; - -} // end namespace llvm - -#endif // !defined(LLVM_CODEGEN_LIVEINTERVALUNION) diff --git a/lib/CodeGen/LiveRangeCalc.cpp b/lib/CodeGen/LiveRangeCalc.cpp index c3ff4f1b6d2e..dede490d91ba 100644 --- a/lib/CodeGen/LiveRangeCalc.cpp +++ b/lib/CodeGen/LiveRangeCalc.cpp @@ -18,10 +18,11 @@ using namespace llvm; -void LiveRangeCalc::reset(const MachineFunction *MF, +void LiveRangeCalc::reset(const MachineFunction *mf, SlotIndexes *SI, MachineDominatorTree *MDT, VNInfo::Allocator *VNIA) { + MF = mf; MRI = &MF->getRegInfo(); Indexes = SI; DomTree = MDT; @@ -104,28 +105,28 @@ void LiveRangeCalc::extendToUses(LiveInterval *LI, unsigned Reg) { // Transfer information from the LiveIn vector to the live ranges. -void LiveRangeCalc::updateLiveIns(VNInfo *OverrideVNI) { +void LiveRangeCalc::updateLiveIns() { + LiveRangeUpdater Updater; for (SmallVectorImpl::iterator I = LiveIn.begin(), E = LiveIn.end(); I != E; ++I) { if (!I->DomNode) continue; MachineBasicBlock *MBB = I->DomNode->getBlock(); - - VNInfo *VNI = OverrideVNI ? OverrideVNI : I->Value; - assert(VNI && "No live-in value found"); - + assert(I->Value && "No live-in value found"); SlotIndex Start, End; tie(Start, End) = Indexes->getMBBRange(MBB); if (I->Kill.isValid()) - I->LI->addRange(LiveRange(Start, I->Kill, VNI)); + // Value is killed inside this block. + End = I->Kill; else { - I->LI->addRange(LiveRange(Start, End, VNI)); - // The value is live-through, update LiveOut as well. Defer the Domtree - // lookup until it is needed. + // The value is live-through, update LiveOut as well. + // Defer the Domtree lookup until it is needed. assert(Seen.test(MBB->getNumber())); - LiveOut[MBB] = LiveOutPair(VNI, (MachineDomTreeNode *)0); + LiveOut[MBB] = LiveOutPair(I->Value, (MachineDomTreeNode *)0); } + Updater.setDest(I->LI); + Updater.add(Start, End, I->Value); } LiveIn.clear(); } @@ -150,13 +151,11 @@ void LiveRangeCalc::extend(LiveInterval *LI, // multiple values, and we may need to create even more phi-defs to preserve // VNInfo SSA form. Perform a search for all predecessor blocks where we // know the dominating VNInfo. - VNInfo *VNI = findReachingDefs(LI, KillMBB, Kill, PhysReg); + if (findReachingDefs(LI, KillMBB, Kill, PhysReg)) + return; // When there were multiple different values, we may need new PHIs. - if (!VNI) - updateSSA(); - - updateLiveIns(VNI); + calculateValues(); } @@ -167,16 +166,18 @@ void LiveRangeCalc::calculateValues() { assert(Indexes && "Missing SlotIndexes"); assert(DomTree && "Missing dominator tree"); updateSSA(); - updateLiveIns(0); + updateLiveIns(); } -VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg) { - // Blocks where LI should be live-in. - SmallVector WorkList(1, KillMBB); +bool LiveRangeCalc::findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg) { + unsigned KillMBBNum = KillMBB->getNumber(); + + // Block numbers where LI should be live-in. + SmallVector WorkList(1, KillMBBNum); // Remember if we have seen more than one value. bool UniqueVNI = true; @@ -184,7 +185,7 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // Using Seen as a visited set, perform a BFS for all reaching defs. for (unsigned i = 0; i != WorkList.size(); ++i) { - MachineBasicBlock *MBB = WorkList[i]; + MachineBasicBlock *MBB = MF->getBlockNumbered(WorkList[i]); #ifndef NDEBUG if (MBB->pred_empty()) { @@ -231,25 +232,50 @@ VNInfo *LiveRangeCalc::findReachingDefs(LiveInterval *LI, // No, we need a live-in value for Pred as well if (Pred != KillMBB) - WorkList.push_back(Pred); + WorkList.push_back(Pred->getNumber()); else // Loopback to KillMBB, so value is really live through. Kill = SlotIndex(); } } - // Transfer WorkList to LiveInBlocks in reverse order. - // This ordering works best with updateSSA(). LiveIn.clear(); - LiveIn.reserve(WorkList.size()); - while(!WorkList.empty()) - addLiveInBlock(LI, DomTree->getNode(WorkList.pop_back_val())); - // The kill block may not be live-through. - assert(LiveIn.back().DomNode->getBlock() == KillMBB); - LiveIn.back().Kill = Kill; + // Both updateSSA() and LiveRangeUpdater benefit from ordered blocks, but + // neither require it. Skip the sorting overhead for small updates. + if (WorkList.size() > 4) + array_pod_sort(WorkList.begin(), WorkList.end()); + + // If a unique reaching def was found, blit in the live ranges immediately. + if (UniqueVNI) { + LiveRangeUpdater Updater(LI); + for (SmallVectorImpl::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + SlotIndex Start, End; + tie(Start, End) = Indexes->getMBBRange(*I); + // Trim the live range in KillMBB. + if (*I == KillMBBNum && Kill.isValid()) + End = Kill; + else + LiveOut[MF->getBlockNumbered(*I)] = + LiveOutPair(TheVNI, (MachineDomTreeNode *)0); + Updater.add(Start, End, TheVNI); + } + return true; + } + + // Multiple values were found, so transfer the work list to the LiveIn array + // where UpdateSSA will use it as a work list. + LiveIn.reserve(WorkList.size()); + for (SmallVectorImpl::const_iterator + I = WorkList.begin(), E = WorkList.end(); I != E; ++I) { + MachineBasicBlock *MBB = MF->getBlockNumbered(*I); + addLiveInBlock(LI, DomTree->getNode(MBB)); + if (MBB == KillMBB) + LiveIn.back().Kill = Kill; + } - return UniqueVNI ? TheVNI : 0; + return false; } diff --git a/lib/CodeGen/LiveRangeCalc.h b/lib/CodeGen/LiveRangeCalc.h index 909829b22851..57cab7b34220 100644 --- a/lib/CodeGen/LiveRangeCalc.h +++ b/lib/CodeGen/LiveRangeCalc.h @@ -34,6 +34,7 @@ template class DomTreeNodeBase; typedef DomTreeNodeBase MachineDomTreeNode; class LiveRangeCalc { + const MachineFunction *MF; const MachineRegisterInfo *MRI; SlotIndexes *Indexes; MachineDominatorTree *DomTree; @@ -100,17 +101,20 @@ class LiveRangeCalc { /// used to add entries directly. SmallVector LiveIn; - /// findReachingDefs - Assuming that LI is live-in to KillMBB and killed at - /// Kill, search for values that can reach KillMBB. All blocks that need LI - /// to be live-in are added to LiveIn. If a unique reaching def is found, - /// its value is returned, if Kill is jointly dominated by multiple values, - /// NULL is returned. + /// Assuming that LI is live-in to KillMBB and killed at Kill, find the set + /// of defs that can reach it. + /// + /// If only one def can reach Kill, all paths from the def to kill are added + /// to LI, and the function returns true. + /// + /// If multiple values can reach Kill, the blocks that need LI to be live in + /// are added to the LiveIn array, and the function returns false. /// /// PhysReg, when set, is used to verify live-in lists on basic blocks. - VNInfo *findReachingDefs(LiveInterval *LI, - MachineBasicBlock *KillMBB, - SlotIndex Kill, - unsigned PhysReg); + bool findReachingDefs(LiveInterval *LI, + MachineBasicBlock *KillMBB, + SlotIndex Kill, + unsigned PhysReg); /// updateSSA - Compute the values that will be live in to all requested /// blocks in LiveIn. Create PHI-def values as required to preserve SSA form. @@ -119,12 +123,11 @@ class LiveRangeCalc { /// blocks. No values are read from the live ranges. void updateSSA(); - /// updateLiveIns - Add liveness as specified in the LiveIn vector, using VNI - /// as a wildcard value for LiveIn entries without a value. - void updateLiveIns(VNInfo *VNI); + /// Add liveness as specified in the LiveIn vector. + void updateLiveIns(); public: - LiveRangeCalc() : MRI(0), Indexes(0), DomTree(0), Alloc(0) {} + LiveRangeCalc() : MF(0), MRI(0), Indexes(0), DomTree(0), Alloc(0) {} //===--------------------------------------------------------------------===// // High-level interface. diff --git a/lib/CodeGen/LiveRangeEdit.cpp b/lib/CodeGen/LiveRangeEdit.cpp index f8fbc7ddf0c1..7793e96c3540 100644 --- a/lib/CodeGen/LiveRangeEdit.cpp +++ b/lib/CodeGen/LiveRangeEdit.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "VirtRegMap.h" +#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; @@ -77,7 +77,7 @@ bool LiveRangeEdit::anyRematerializable(AliasAnalysis *aa) { /// OrigIdx are also available with the same value at UseIdx. bool LiveRangeEdit::allUsesAvailableAt(const MachineInstr *OrigMI, SlotIndex OrigIdx, - SlotIndex UseIdx) { + SlotIndex UseIdx) const { OrigIdx = OrigIdx.getRegSlot(true); UseIdx = UseIdx.getRegSlot(true); for (unsigned i = 0, e = OrigMI->getNumOperands(); i != e; ++i) { diff --git a/lib/CodeGen/LiveRegMatrix.cpp b/lib/CodeGen/LiveRegMatrix.cpp index 7f22478d01cd..0ef069f47827 100644 --- a/lib/CodeGen/LiveRegMatrix.cpp +++ b/lib/CodeGen/LiveRegMatrix.cpp @@ -12,16 +12,16 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "LiveRegMatrix.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "RegisterCoalescer.h" -#include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/LiveRegMatrix.h b/lib/CodeGen/LiveRegMatrix.h deleted file mode 100644 index 8f22c24478f4..000000000000 --- a/lib/CodeGen/LiveRegMatrix.h +++ /dev/null @@ -1,148 +0,0 @@ -//===-- LiveRegMatrix.h - Track register interference ---------*- C++ -*---===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// The LiveRegMatrix analysis pass keeps track of virtual register interference -// along two dimensions: Slot indexes and register units. The matrix is used by -// register allocators to ensure that no interfering virtual registers get -// assigned to overlapping physical registers. -// -// Register units are defined in MCRegisterInfo.h, they represent the smallest -// unit of interference when dealing with overlapping physical registers. The -// LiveRegMatrix is represented as a LiveIntervalUnion per register unit. When -// a virtual register is assigned to a physical register, the live range for -// the virtual register is inserted into the LiveIntervalUnion for each regunit -// in the physreg. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_LIVEREGMATRIX_H -#define LLVM_CODEGEN_LIVEREGMATRIX_H - -#include "LiveIntervalUnion.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/CodeGen/MachineFunctionPass.h" - -namespace llvm { - -class LiveInterval; -class LiveIntervalAnalysis; -class MachineRegisterInfo; -class TargetRegisterInfo; -class VirtRegMap; - -class LiveRegMatrix : public MachineFunctionPass { - const TargetRegisterInfo *TRI; - MachineRegisterInfo *MRI; - LiveIntervals *LIS; - VirtRegMap *VRM; - - // UserTag changes whenever virtual registers have been modified. - unsigned UserTag; - - // The matrix is represented as a LiveIntervalUnion per register unit. - LiveIntervalUnion::Allocator LIUAlloc; - LiveIntervalUnion::Array Matrix; - - // Cached queries per register unit. - OwningArrayPtr Queries; - - // Cached register mask interference info. - unsigned RegMaskTag; - unsigned RegMaskVirtReg; - BitVector RegMaskUsable; - - // MachineFunctionPass boilerplate. - virtual void getAnalysisUsage(AnalysisUsage&) const; - virtual bool runOnMachineFunction(MachineFunction&); - virtual void releaseMemory(); -public: - static char ID; - LiveRegMatrix(); - - //===--------------------------------------------------------------------===// - // High-level interface. - //===--------------------------------------------------------------------===// - // - // Check for interference before assigning virtual registers to physical - // registers. - // - - /// Invalidate cached interference queries after modifying virtual register - /// live ranges. Interference checks may return stale information unless - /// caches are invalidated. - void invalidateVirtRegs() { ++UserTag; } - - enum InterferenceKind { - /// No interference, go ahead and assign. - IK_Free = 0, - - /// Virtual register interference. There are interfering virtual registers - /// assigned to PhysReg or its aliases. This interference could be resolved - /// by unassigning those other virtual registers. - IK_VirtReg, - - /// Register unit interference. A fixed live range is in the way, typically - /// argument registers for a call. This can't be resolved by unassigning - /// other virtual registers. - IK_RegUnit, - - /// RegMask interference. The live range is crossing an instruction with a - /// regmask operand that doesn't preserve PhysReg. This typically means - /// VirtReg is live across a call, and PhysReg isn't call-preserved. - IK_RegMask - }; - - /// Check for interference before assigning VirtReg to PhysReg. - /// If this function returns IK_Free, it is legal to assign(VirtReg, PhysReg). - /// When there is more than one kind of interference, the InterferenceKind - /// with the highest enum value is returned. - InterferenceKind checkInterference(LiveInterval &VirtReg, unsigned PhysReg); - - /// Assign VirtReg to PhysReg. - /// This will mark VirtReg's live range as occupied in the LiveRegMatrix and - /// update VirtRegMap. The live range is expected to be available in PhysReg. - void assign(LiveInterval &VirtReg, unsigned PhysReg); - - /// Unassign VirtReg from its PhysReg. - /// Assuming that VirtReg was previously assigned to a PhysReg, this undoes - /// the assignment and updates VirtRegMap accordingly. - void unassign(LiveInterval &VirtReg); - - //===--------------------------------------------------------------------===// - // Low-level interface. - //===--------------------------------------------------------------------===// - // - // Provide access to the underlying LiveIntervalUnions. - // - - /// Check for regmask interference only. - /// Return true if VirtReg crosses a regmask operand that clobbers PhysReg. - /// If PhysReg is null, check if VirtReg crosses any regmask operands. - bool checkRegMaskInterference(LiveInterval &VirtReg, unsigned PhysReg = 0); - - /// Check for regunit interference only. - /// Return true if VirtReg overlaps a fixed assignment of one of PhysRegs's - /// register units. - bool checkRegUnitInterference(LiveInterval &VirtReg, unsigned PhysReg); - - /// Query a line of the assigned virtual register matrix directly. - /// Use MCRegUnitIterator to enumerate all regunits in the desired PhysReg. - /// This returns a reference to an internal Query data structure that is only - /// valid until the next query() call. - LiveIntervalUnion::Query &query(LiveInterval &VirtReg, unsigned RegUnit); - - /// Directly access the live interval unions per regunit. - /// This returns an array indexed by the regunit number. - LiveIntervalUnion *getLiveUnions() { return &Matrix[0]; } -}; - -} // end namespace llvm - -#endif // LLVM_CODEGEN_LIVEREGMATRIX_H diff --git a/lib/CodeGen/LiveStackAnalysis.cpp b/lib/CodeGen/LiveStackAnalysis.cpp index f0b522bd7d36..be11a8fa86ef 100644 --- a/lib/CodeGen/LiveStackAnalysis.cpp +++ b/lib/CodeGen/LiveStackAnalysis.cpp @@ -15,12 +15,12 @@ #define DEBUG_TYPE "livestacks" #include "llvm/CodeGen/LiveStackAnalysis.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; diff --git a/lib/CodeGen/LiveVariables.cpp b/lib/CodeGen/LiveVariables.cpp index 6ea933d4304b..789eddc42774 100644 --- a/lib/CodeGen/LiveVariables.cpp +++ b/lib/CodeGen/LiveVariables.cpp @@ -27,17 +27,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/LiveVariables.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/STLExtras.h" #include using namespace llvm; @@ -619,29 +619,6 @@ bool LiveVariables::runOnMachineFunction(MachineFunction &mf) { MBB); } - // Finally, if the last instruction in the block is a return, make sure to - // mark it as using all of the live-out values in the function. - // Things marked both call and return are tail calls; do not do this for - // them. The tail callee need not take the same registers as input - // that it produces as output, and there are dependencies for its input - // registers elsewhere. - if (!MBB->empty() && MBB->back().isReturn() - && !MBB->back().isCall()) { - MachineInstr *Ret = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MF->getRegInfo().liveout_begin(), - E = MF->getRegInfo().liveout_end(); I != E; ++I) { - assert(TargetRegisterInfo::isPhysicalRegister(*I) && - "Cannot have a live-out virtual register!"); - HandlePhysRegUse(*I, Ret); - - // Add live-out registers as implicit uses. - if (!Ret->readsRegister(*I)) - Ret->addOperand(MachineOperand::CreateReg(*I, false, true)); - } - } - // MachineCSE may CSE instructions which write to non-allocatable physical // registers across MBBs. Remember if any reserved register is liveout. SmallSet LiveOuts; diff --git a/lib/CodeGen/LocalStackSlotAllocation.cpp b/lib/CodeGen/LocalStackSlotAllocation.cpp index fbc9e20517c2..352ef942591f 100644 --- a/lib/CodeGen/LocalStackSlotAllocation.cpp +++ b/lib/CodeGen/LocalStackSlotAllocation.cpp @@ -15,26 +15,26 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "localstackalloc" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/CodeGen/MachineBasicBlock.cpp b/lib/CodeGen/MachineBasicBlock.cpp index 18d021d521d6..898e165feeab 100644 --- a/lib/CodeGen/MachineBasicBlock.cpp +++ b/lib/CodeGen/MachineBasicBlock.cpp @@ -12,24 +12,26 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineBasicBlock.h" -#include "llvm/BasicBlock.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Assembly/Writer.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SlotIndexes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/DataLayout.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Assembly/Writer.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Support/Debug.h" #include "llvm/Support/LeakDetector.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -663,6 +665,13 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { << " -- BB#" << NMBB->getNumber() << " -- BB#" << Succ->getNumber() << '\n'); + LiveIntervals *LIS = P->getAnalysisIfAvailable(); + SlotIndexes *Indexes = P->getAnalysisIfAvailable(); + if (LIS) + LIS->insertMBBInMaps(NMBB); + else if (Indexes) + Indexes->insertMBBInMaps(NMBB); + // On some targets like Mips, branches may kill virtual registers. Make sure // that LiveVariables is properly updated after updateTerminator replaces the // terminators. @@ -689,14 +698,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { } } + SmallVector UsedRegs; + if (LIS) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) { + MachineInstr *MI = I; + + for (MachineInstr::mop_iterator OI = MI->operands_begin(), + OE = MI->operands_end(); OI != OE; ++OI) { + if (!OI->isReg() || OI->getReg() == 0) + continue; + + unsigned Reg = OI->getReg(); + if (std::find(UsedRegs.begin(), UsedRegs.end(), Reg) == UsedRegs.end()) + UsedRegs.push_back(Reg); + } + } + } + ReplaceUsesOfBlockWith(Succ, NMBB); + + // If updateTerminator() removes instructions, we need to remove them from + // SlotIndexes. + SmallVector Terminators; + if (Indexes) { + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + Terminators.push_back(I); + } + updateTerminator(); + if (Indexes) { + SmallVector NewTerminators; + for (instr_iterator I = getFirstInstrTerminator(), E = instr_end(); + I != E; ++I) + NewTerminators.push_back(I); + + for (SmallVectorImpl::iterator I = Terminators.begin(), + E = Terminators.end(); I != E; ++I) { + if (std::find(NewTerminators.begin(), NewTerminators.end(), *I) == + NewTerminators.end()) + Indexes->removeMachineInstrFromMaps(*I); + } + } + // Insert unconditional "jump Succ" instruction in NMBB if necessary. NMBB->addSuccessor(Succ); if (!NMBB->isLayoutSuccessor(Succ)) { Cond.clear(); MF->getTarget().getInstrInfo()->InsertBranch(*NMBB, Succ, NULL, Cond, dl); + + if (Indexes) { + for (instr_iterator I = NMBB->instr_begin(), E = NMBB->instr_end(); + I != E; ++I) { + // Some instructions may have been moved to NMBB by updateTerminator(), + // so we first remove any instruction that already has an index. + if (Indexes->hasIndex(I)) + Indexes->removeMachineInstrFromMaps(I); + Indexes->insertMachineInstrInMaps(I); + } + } } // Fix PHI nodes in Succ so they refer to NMBB instead of this @@ -731,6 +793,67 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { LV->addNewBlock(NMBB, this, Succ); } + if (LIS) { + // After splitting the edge and updating SlotIndexes, live intervals may be + // in one of two situations, depending on whether this block was the last in + // the function. If the original block was the last in the function, all live + // intervals will end prior to the beginning of the new split block. If the + // original block was not at the end of the function, all live intervals will + // extend to the end of the new split block. + + bool isLastMBB = + llvm::next(MachineFunction::iterator(NMBB)) == getParent()->end(); + + SlotIndex StartIndex = Indexes->getMBBEndIdx(this); + SlotIndex PrevIndex = StartIndex.getPrevSlot(); + SlotIndex EndIndex = Indexes->getMBBEndIdx(NMBB); + + // Find the registers used from NMBB in PHIs in Succ. + SmallSet PHISrcRegs; + for (MachineBasicBlock::instr_iterator + I = Succ->instr_begin(), E = Succ->instr_end(); + I != E && I->isPHI(); ++I) { + for (unsigned ni = 1, ne = I->getNumOperands(); ni != ne; ni += 2) { + if (I->getOperand(ni+1).getMBB() == NMBB) { + MachineOperand &MO = I->getOperand(ni); + unsigned Reg = MO.getReg(); + PHISrcRegs.insert(Reg); + if (MO.isUndef()) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "PHI sources should be live out of their predecessors."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } + } + } + + MachineRegisterInfo *MRI = &getParent()->getRegInfo(); + for (unsigned i = 0, e = MRI->getNumVirtRegs(); i != e; ++i) { + unsigned Reg = TargetRegisterInfo::index2VirtReg(i); + if (PHISrcRegs.count(Reg) || !LIS->hasInterval(Reg)) + continue; + + LiveInterval &LI = LIS->getInterval(Reg); + if (!LI.liveAt(PrevIndex)) + continue; + + bool isLiveOut = LI.liveAt(LIS->getMBBStartIdx(Succ)); + if (isLiveOut && isLastMBB) { + VNInfo *VNI = LI.getVNInfoAt(PrevIndex); + assert(VNI && "LiveInterval should have VNInfo where it is live."); + LI.addRange(LiveRange(StartIndex, EndIndex, VNI)); + } else if (!isLiveOut && !isLastMBB) { + LI.removeRange(StartIndex, EndIndex); + } + } + + // Update all intervals for registers whose uses may have been modified by + // updateTerminator(). + LIS->repairIntervalsInRange(this, getFirstTerminator(), end(), UsedRegs); + } + if (MachineDominatorTree *MDT = P->getAnalysisIfAvailable()) { // Update dominator information. @@ -788,40 +911,42 @@ MachineBasicBlock::SplitCriticalEdge(MachineBasicBlock *Succ, Pass *P) { return NMBB; } -MachineBasicBlock::iterator -MachineBasicBlock::erase(MachineBasicBlock::iterator I) { - if (I->isBundle()) { - MachineBasicBlock::iterator E = llvm::next(I); - return Insts.erase(I.getInstrIterator(), E.getInstrIterator()); - } - - return Insts.erase(I.getInstrIterator()); +/// Prepare MI to be removed from its bundle. This fixes bundle flags on MI's +/// neighboring instructions so the bundle won't be broken by removing MI. +static void unbundleSingleMI(MachineInstr *MI) { + // Removing the first instruction in a bundle. + if (MI->isBundledWithSucc() && !MI->isBundledWithPred()) + MI->unbundleFromSucc(); + // Removing the last instruction in a bundle. + if (MI->isBundledWithPred() && !MI->isBundledWithSucc()) + MI->unbundleFromPred(); + // If MI is not bundled, or if it is internal to a bundle, the neighbor flags + // are already fine. } -MachineInstr *MachineBasicBlock::remove(MachineInstr *I) { - if (I->isBundle()) { - instr_iterator MII = llvm::next(I); - iterator E = end(); - while (MII != E && MII->isInsideBundle()) { - MachineInstr *MI = &*MII++; - Insts.remove(MI); - } - } +MachineBasicBlock::instr_iterator +MachineBasicBlock::erase(MachineBasicBlock::instr_iterator I) { + unbundleSingleMI(I); + return Insts.erase(I); +} - return Insts.remove(I); +MachineInstr *MachineBasicBlock::remove_instr(MachineInstr *MI) { + unbundleSingleMI(MI); + MI->clearFlag(MachineInstr::BundledPred); + MI->clearFlag(MachineInstr::BundledSucc); + return Insts.remove(MI); } -void MachineBasicBlock::splice(MachineBasicBlock::iterator where, - MachineBasicBlock *Other, - MachineBasicBlock::iterator From) { - if (From->isBundle()) { - MachineBasicBlock::iterator To = llvm::next(From); - Insts.splice(where.getInstrIterator(), Other->Insts, - From.getInstrIterator(), To.getInstrIterator()); - return; +MachineBasicBlock::instr_iterator +MachineBasicBlock::insert(instr_iterator I, MachineInstr *MI) { + assert(!MI->isBundledWithPred() && !MI->isBundledWithSucc() && + "Cannot insert instruction with bundle flags"); + // Set the bundle flags when inserting inside a bundle. + if (I != instr_end() && I->isBundledWithPred()) { + MI->setFlag(MachineInstr::BundledPred); + MI->setFlag(MachineInstr::BundledSucc); } - - Insts.splice(where.getInstrIterator(), Other->Insts, From.getInstrIterator()); + return Insts.insert(I, MI); } /// removeFromParent - This method unlinks 'this' from the containing function, @@ -982,7 +1107,6 @@ MachineBasicBlock::LivenessQueryResult MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, unsigned Reg, MachineInstr *MI, unsigned Neighborhood) { - unsigned N = Neighborhood; MachineBasicBlock *MBB = MI->getParent(); @@ -997,14 +1121,18 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, MachineOperandIteratorBase::PhysRegInfo Analysis = MIOperands(I).analyzePhysReg(Reg, TRI); - if (Analysis.Kills) + if (Analysis.Defines) + // Outputs happen after inputs so they take precedence if both are + // present. + return Analysis.DefinesDead ? LQR_Dead : LQR_Live; + + if (Analysis.Kills || Analysis.Clobbers) // Register killed, so isn't live. return LQR_Dead; - else if (Analysis.DefinesOverlap || Analysis.ReadsOverlap) + else if (Analysis.ReadsOverlap) // Defined or read without a previous kill - live. - return (Analysis.Defines || Analysis.Reads) ? - LQR_Live : LQR_OverlappingLive; + return Analysis.Reads ? LQR_Live : LQR_OverlappingLive; } while (I != MBB->begin() && --N > 0); } @@ -1036,7 +1164,7 @@ MachineBasicBlock::computeRegisterLiveness(const TargetRegisterInfo *TRI, return (Analysis.Reads) ? LQR_Live : LQR_OverlappingLive; - else if (Analysis.DefinesOverlap) + else if (Analysis.Clobbers || Analysis.Defines) // Defined (but not read) therefore cannot have been live. return LQR_Dead; } diff --git a/lib/CodeGen/MachineBlockFrequencyInfo.cpp b/lib/CodeGen/MachineBlockFrequencyInfo.cpp index a079d6e59139..070daf2e2ba2 100644 --- a/lib/CodeGen/MachineBlockFrequencyInfo.cpp +++ b/lib/CodeGen/MachineBlockFrequencyInfo.cpp @@ -11,11 +11,11 @@ // //===----------------------------------------------------------------------===// -#include "llvm/InitializePasses.h" -#include "llvm/Analysis/BlockFrequencyImpl.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/Analysis/BlockFrequencyImpl.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/InitializePasses.h" using namespace llvm; diff --git a/lib/CodeGen/MachineBlockPlacement.cpp b/lib/CodeGen/MachineBlockPlacement.cpp index cd3f19944e46..cd948e24a6b2 100644 --- a/lib/CodeGen/MachineBlockPlacement.cpp +++ b/lib/CodeGen/MachineBlockPlacement.cpp @@ -26,6 +26,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "block-placement2" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" @@ -33,13 +38,8 @@ #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLowering.h" #include @@ -171,7 +171,7 @@ class MachineBlockPlacement : public MachineFunctionPass { const TargetInstrInfo *TII; /// \brief A handle to the target's lowering info. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; /// \brief Allocator and owner of BlockChain structures. /// @@ -1013,8 +1013,8 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { // exclusively on the loop info here so that we can align backedges in // unnatural CFGs and backedges that were introduced purely because of the // loop rotations done during this layout pass. - if (F.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize)) + if (F.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) return; unsigned Align = TLI->getPrefLoopAlignment(); if (!Align) @@ -1061,7 +1061,7 @@ void MachineBlockPlacement::buildCFGChains(MachineFunction &F) { } // Align this block if the layout predecessor's edge into this block is - // cold relative to the block. When this is true, othe predecessors make up + // cold relative to the block. When this is true, other predecessors make up // all of the hot entries into the block and thus alignment is likely to be // important. BranchProbability LayoutProb = MBPI->getEdgeProbability(LayoutPred, *BI); diff --git a/lib/CodeGen/MachineBranchProbabilityInfo.cpp b/lib/CodeGen/MachineBranchProbabilityInfo.cpp index 447921147f03..ae70912b6c69 100644 --- a/lib/CodeGen/MachineBranchProbabilityInfo.cpp +++ b/lib/CodeGen/MachineBranchProbabilityInfo.cpp @@ -11,9 +11,9 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Instructions.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" diff --git a/lib/CodeGen/MachineCSE.cpp b/lib/CodeGen/MachineCSE.cpp index dbc41defeb5a..61d8d384cd38 100644 --- a/lib/CodeGen/MachineCSE.cpp +++ b/lib/CodeGen/MachineCSE.cpp @@ -15,17 +15,17 @@ #define DEBUG_TYPE "machine-cse" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/ScopedHashTable.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/RecyclingAllocator.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; STATISTIC(NumCoalesces, "Number of copies coalesced"); @@ -126,8 +126,6 @@ bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI, // deleted. continue; MachineInstr *DefMI = MRI->getVRegDef(Reg); - if (DefMI->getParent() != MBB) - continue; if (!DefMI->isCopy()) continue; unsigned SrcReg = DefMI->getOperand(1).getReg(); diff --git a/lib/CodeGen/MachineCopyPropagation.cpp b/lib/CodeGen/MachineCopyPropagation.cpp index 4a793281b2cd..dc8a2241c7e1 100644 --- a/lib/CodeGen/MachineCopyPropagation.cpp +++ b/lib/CodeGen/MachineCopyPropagation.cpp @@ -13,19 +13,19 @@ #define DEBUG_TYPE "codegen-cp" #include "llvm/CodeGen/Passes.h" -#include "llvm/Pass.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Pass.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; STATISTIC(NumDeletes, "Number of dead copies deleted"); @@ -33,6 +33,7 @@ STATISTIC(NumDeletes, "Number of dead copies deleted"); namespace { class MachineCopyPropagation : public MachineFunctionPass { const TargetRegisterInfo *TRI; + const TargetInstrInfo *TII; MachineRegisterInfo *MRI; public: @@ -51,6 +52,7 @@ namespace { SourceMap &SrcMap, DenseMap &AvailCopyMap); bool CopyPropagateBlock(MachineBasicBlock &MBB); + void removeCopy(MachineInstr *MI); }; } char MachineCopyPropagation::ID = 0; @@ -124,6 +126,16 @@ static bool isNopCopy(MachineInstr *CopyMI, unsigned Def, unsigned Src, return false; } +// Remove MI from the function because it has been determined it is dead. +// Turn it into a noop KILL instruction if it has super-register liveness +// adjustments. +void MachineCopyPropagation::removeCopy(MachineInstr *MI) { + if (MI->getNumOperands() == 2) + MI->eraseFromParent(); + else + MI->setDesc(TII->get(TargetOpcode::KILL)); +} + bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { SmallSetVector MaybeDeadCopies; // Candidates for deletion DenseMap AvailCopyMap; // Def -> available copies map @@ -169,7 +181,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { for (MachineBasicBlock::iterator I = CopyMI, E = MI; I != E; ++I) I->clearRegisterKills(Def, TRI); - MI->eraseFromParent(); + removeCopy(MI); Changed = true; ++NumDeletes; continue; @@ -262,7 +274,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { unsigned Reg = (*DI)->getOperand(0).getReg(); if (MRI->isReserved(Reg) || !MaskMO.clobbersPhysReg(Reg)) continue; - (*DI)->eraseFromParent(); + removeCopy(*DI); Changed = true; ++NumDeletes; } @@ -298,7 +310,7 @@ bool MachineCopyPropagation::CopyPropagateBlock(MachineBasicBlock &MBB) { DI = MaybeDeadCopies.begin(), DE = MaybeDeadCopies.end(); DI != DE; ++DI) { if (!MRI->isReserved((*DI)->getOperand(0).getReg())) { - (*DI)->eraseFromParent(); + removeCopy(*DI); Changed = true; ++NumDeletes; } @@ -312,6 +324,7 @@ bool MachineCopyPropagation::runOnMachineFunction(MachineFunction &MF) { bool Changed = false; TRI = MF.getTarget().getRegisterInfo(); + TII = MF.getTarget().getInstrInfo(); MRI = &MF.getRegInfo(); for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) diff --git a/lib/CodeGen/MachineFunction.cpp b/lib/CodeGen/MachineFunction.cpp index 91d521185767..04321f329282 100644 --- a/lib/CodeGen/MachineFunction.cpp +++ b/lib/CodeGen/MachineFunction.cpp @@ -14,28 +14,28 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Analysis/ConstantFolding.h" #include "llvm/CodeGen/MachineConstantPool.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCContext.h" -#include "llvm/Analysis/ConstantFolding.h" #include "llvm/Support/Debug.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -58,14 +58,17 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, else RegInfo = 0; MFInfo = 0; - FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering()); - if (Fn->getFnAttributes().hasAttribute(Attributes::StackAlignment)) + FrameInfo = new (Allocator) MachineFrameInfo(*TM.getFrameLowering(), + TM.Options.RealignStack); + if (Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackAlignment)) FrameInfo->ensureMaxAlignment(Fn->getAttributes(). - getFnAttributes().getStackAlignment()); + getStackAlignment(AttributeSet::FunctionIndex)); ConstantPool = new (Allocator) MachineConstantPool(TM.getDataLayout()); Alignment = TM.getTargetLowering()->getMinFunctionAlignment(); // FIXME: Shouldn't use pref alignment if explicit alignment is set on Fn. - if (!Fn->getFnAttributes().hasAttribute(Attributes::OptimizeForSize)) + if (!Fn->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize)) Alignment = std::max(Alignment, TM.getTargetLowering()->getPrefFunctionAlignment()); FunctionNumber = FunctionNum; @@ -73,8 +76,15 @@ MachineFunction::MachineFunction(const Function *F, const TargetMachine &TM, } MachineFunction::~MachineFunction() { - BasicBlocks.clear(); + // Don't call destructors on MachineInstr and MachineOperand. All of their + // memory comes from the BumpPtrAllocator which is about to be purged. + // + // Do call MachineBasicBlock destructors, it contains std::vectors. + for (iterator I = begin(), E = end(); I != E; I = BasicBlocks.erase(I)) + I->Insts.clearAndLeakNodesUnsafely(); + InstructionRecycler.clear(Allocator); + OperandRecycler.clear(Allocator); BasicBlockRecycler.clear(Allocator); if (RegInfo) { RegInfo->~MachineRegisterInfo(); @@ -157,7 +167,7 @@ MachineInstr * MachineFunction::CreateMachineInstr(const MCInstrDesc &MCID, DebugLoc DL, bool NoImp) { return new (InstructionRecycler.Allocate(Allocator)) - MachineInstr(MCID, DL, NoImp); + MachineInstr(*this, MCID, DL, NoImp); } /// CloneMachineInstr - Create a new MachineInstr which is a copy of the @@ -172,9 +182,17 @@ MachineFunction::CloneMachineInstr(const MachineInstr *Orig) { /// DeleteMachineInstr - Delete the given MachineInstr. /// +/// This function also serves as the MachineInstr destructor - the real +/// ~MachineInstr() destructor must be empty. void MachineFunction::DeleteMachineInstr(MachineInstr *MI) { - MI->~MachineInstr(); + // Strip it for parts. The operand array and the MI object itself are + // independently recyclable. + if (MI->Operands) + deallocateOperandArray(MI->CapOperands, MI->Operands); + // Don't call ~MachineInstr() which must be trivial anyway because + // ~MachineFunction drops whole lists of MachineInstrs wihout calling their + // destructors. InstructionRecycler.Deallocate(Allocator, MI); } @@ -328,13 +346,6 @@ void MachineFunction::print(raw_ostream &OS, SlotIndexes *Indexes) const { } OS << '\n'; } - if (RegInfo && !RegInfo->liveout_empty()) { - OS << "Function Live Outs:"; - for (MachineRegisterInfo::liveout_iterator - I = RegInfo->liveout_begin(), E = RegInfo->liveout_end(); I != E; ++I) - OS << ' ' << PrintReg(*I, TRI); - OS << '\n'; - } for (const_iterator BB = begin(), E = end(); BB != E; ++BB) { OS << '\n'; @@ -445,6 +456,70 @@ MCSymbol *MachineFunction::getPICBaseSymbol() const { // MachineFrameInfo implementation //===----------------------------------------------------------------------===// +/// ensureMaxAlignment - Make sure the function is at least Align bytes +/// aligned. +void MachineFrameInfo::ensureMaxAlignment(unsigned Align) { + if (!TFI.isStackRealignable() || !RealignOption) + assert(Align <= TFI.getStackAlignment() && + "For targets without stack realignment, Align is out of limit!"); + if (MaxAlignment < Align) MaxAlignment = Align; +} + +/// clampStackAlignment - Clamp the alignment if requested and emit a warning. +static inline unsigned clampStackAlignment(bool ShouldClamp, unsigned Align, + unsigned StackAlign) { + if (!ShouldClamp || Align <= StackAlign) + return Align; + DEBUG(dbgs() << "Warning: requested alignment " << Align + << " exceeds the stack alignment " << StackAlign + << " when stack realignment is off" << '\n'); + return StackAlign; +} + +/// CreateStackObject - Create a new statically sized stack object, returning +/// a nonnegative identifier to represent it. +/// +int MachineFrameInfo::CreateStackObject(uint64_t Size, unsigned Alignment, + bool isSS, bool MayNeedSP, const AllocaInst *Alloca) { + assert(Size != 0 && "Cannot allocate zero size stack objects!"); + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + Objects.push_back(StackObject(Size, Alignment, 0, false, isSS, MayNeedSP, + Alloca)); + int Index = (int)Objects.size() - NumFixedObjects - 1; + assert(Index >= 0 && "Bad frame index!"); + ensureMaxAlignment(Alignment); + return Index; +} + +/// CreateSpillStackObject - Create a new statically sized stack object that +/// represents a spill slot, returning a nonnegative identifier to represent +/// it. +/// +int MachineFrameInfo::CreateSpillStackObject(uint64_t Size, + unsigned Alignment) { + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + CreateStackObject(Size, Alignment, true, false); + int Index = (int)Objects.size() - NumFixedObjects - 1; + ensureMaxAlignment(Alignment); + return Index; +} + +/// CreateVariableSizedObject - Notify the MachineFrameInfo object that a +/// variable sized object has been created. This must be created whenever a +/// variable sized object is created, whether or not the index returned is +/// actually used. +/// +int MachineFrameInfo::CreateVariableSizedObject(unsigned Alignment) { + HasVarSizedObjects = true; + Alignment = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Alignment, TFI.getStackAlignment()); + Objects.push_back(StackObject(0, Alignment, 0, false, false, true, 0)); + ensureMaxAlignment(Alignment); + return (int)Objects.size()-NumFixedObjects-1; +} + /// CreateFixedObject - Create a new object at a fixed location on the stack. /// All fixed objects should be created before other objects are created for /// efficiency. By default, fixed objects are immutable. This returns an @@ -459,6 +534,8 @@ int MachineFrameInfo::CreateFixedObject(uint64_t Size, int64_t SPOffset, // object is 16-byte aligned. unsigned StackAlign = TFI.getStackAlignment(); unsigned Align = MinAlign(SPOffset, StackAlign); + Align = clampStackAlignment(!TFI.isStackRealignable() || !RealignOption, + Align, TFI.getStackAlignment()); Objects.insert(Objects.begin(), StackObject(Size, Align, SPOffset, Immutable, /*isSS*/ false, /*NeedSP*/ false, @@ -497,6 +574,54 @@ MachineFrameInfo::getPristineRegs(const MachineBasicBlock *MBB) const { return BV; } +unsigned MachineFrameInfo::estimateStackSize(const MachineFunction &MF) const { + const TargetFrameLowering *TFI = MF.getTarget().getFrameLowering(); + const TargetRegisterInfo *RegInfo = MF.getTarget().getRegisterInfo(); + unsigned MaxAlign = getMaxAlignment(); + int Offset = 0; + + // This code is very, very similar to PEI::calculateFrameObjectOffsets(). + // It really should be refactored to share code. Until then, changes + // should keep in mind that there's tight coupling between the two. + + for (int i = getObjectIndexBegin(); i != 0; ++i) { + int FixedOff = -getObjectOffset(i); + if (FixedOff > Offset) Offset = FixedOff; + } + for (unsigned i = 0, e = getObjectIndexEnd(); i != e; ++i) { + if (isDeadObjectIndex(i)) + continue; + Offset += getObjectSize(i); + unsigned Align = getObjectAlignment(i); + // Adjust to alignment boundary + Offset = (Offset+Align-1)/Align*Align; + + MaxAlign = std::max(Align, MaxAlign); + } + + if (adjustsStack() && TFI->hasReservedCallFrame(MF)) + Offset += getMaxCallFrameSize(); + + // Round up the size to a multiple of the alignment. If the function has + // any calls or alloca's, align to the target's StackAlignment value to + // ensure that the callee's frame or the alloca data is suitably aligned; + // otherwise, for leaf functions, align to the TransientStackAlignment + // value. + unsigned StackAlign; + if (adjustsStack() || hasVarSizedObjects() || + (RegInfo->needsStackRealignment(MF) && getObjectIndexEnd() != 0)) + StackAlign = TFI->getStackAlignment(); + else + StackAlign = TFI->getTransientStackAlignment(); + + // If the frame pointer is eliminated, all frame offsets will be relative to + // SP not FP. Align to MaxAlign so this works. + StackAlign = std::max(StackAlign, MaxAlign); + unsigned AlignMask = StackAlign - 1; + Offset = (Offset + AlignMask) & ~uint64_t(AlignMask); + + return (unsigned)Offset; +} void MachineFrameInfo::print(const MachineFunction &MF, raw_ostream &OS) const{ if (Objects.empty()) return; diff --git a/lib/CodeGen/MachineFunctionPass.cpp b/lib/CodeGen/MachineFunctionPass.cpp index e5a491270a8c..674cc80a006c 100644 --- a/lib/CodeGen/MachineFunctionPass.cpp +++ b/lib/CodeGen/MachineFunctionPass.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineFunctionAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" diff --git a/lib/CodeGen/MachineFunctionPrinterPass.cpp b/lib/CodeGen/MachineFunctionPrinterPass.cpp index ed94efb93551..fa9c821b2af7 100644 --- a/lib/CodeGen/MachineFunctionPrinterPass.cpp +++ b/lib/CodeGen/MachineFunctionPrinterPass.cpp @@ -12,11 +12,11 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/SlotIndexes.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" using namespace llvm; diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index ce8d52000b47..32d066894b5b 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -12,15 +12,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/InlineAsm.h" -#include "llvm/LLVMContext.h" -#include "llvm/Metadata.h" -#include "llvm/Module.h" -#include "llvm/Type.h" -#include "llvm/Value.h" +#include "llvm/ADT/FoldingSet.h" +#include "llvm/ADT/Hashing.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" @@ -28,19 +22,24 @@ #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Metadata.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Type.h" +#include "llvm/IR/Value.h" #include "llvm/MC/MCInstrDesc.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/LeakDetector.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/FoldingSet.h" -#include "llvm/ADT/Hashing.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -144,7 +143,7 @@ void MachineOperand::ChangeToRegister(unsigned Reg, bool isDef, bool isImp, // Change this to a register and set the reg#. OpKind = MO_Register; SmallContents.RegNo = Reg; - SubReg = 0; + SubReg_TargetFlags = 0; IsDef = isDef; IsImp = isImp; IsKill = isKill; @@ -518,89 +517,50 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineMemOperand &MMO) { // MachineInstr Implementation //===----------------------------------------------------------------------===// -/// MachineInstr ctor - This constructor creates a dummy MachineInstr with -/// MCID NULL and no operands. -MachineInstr::MachineInstr() - : MCID(0), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), - Parent(0) { - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); -} - -void MachineInstr::addImplicitDefUseOperands() { +void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) - addOperand(MachineOperand::CreateReg(*ImpDefs, true, true)); + addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) - addOperand(MachineOperand::CreateReg(*ImpUses, false, true)); + addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); } /// MachineInstr ctor - This constructor creates a MachineInstr and adds the /// implicit operands. It reserves space for the number of operands specified by /// the MCInstrDesc. -MachineInstr::MachineInstr(const MCInstrDesc &tid, const DebugLoc dl, - bool NoImp) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { - unsigned NumImplicitOps = 0; - if (!NoImp) - NumImplicitOps = MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); +MachineInstr::MachineInstr(MachineFunction &MF, const MCInstrDesc &tid, + const DebugLoc dl, bool NoImp) + : MCID(&tid), Parent(0), Operands(0), NumOperands(0), + Flags(0), AsmPrinterFlags(0), + NumMemRefs(0), MemRefs(0), debugLoc(dl) { + // Reserve space for the expected number of operands. + if (unsigned NumOps = MCID->getNumOperands() + + MCID->getNumImplicitDefs() + MCID->getNumImplicitUses()) { + CapOperands = OperandCapacity::get(NumOps); + Operands = MF.allocateOperandArray(CapOperands); + } + if (!NoImp) - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); -} - -/// MachineInstr ctor - Work exactly the same as the ctor two above, except -/// that the MachineInstr is created and added to the end of the specified -/// basic block. -MachineInstr::MachineInstr(MachineBasicBlock *MBB, const DebugLoc dl, - const MCInstrDesc &tid) - : MCID(&tid), Flags(0), AsmPrinterFlags(0), - NumMemRefs(0), MemRefs(0), Parent(0), debugLoc(dl) { - assert(MBB && "Cannot use inserting ctor with null basic block!"); - unsigned NumImplicitOps = - MCID->getNumImplicitDefs() + MCID->getNumImplicitUses(); - Operands.reserve(NumImplicitOps + MCID->getNumOperands()); - addImplicitDefUseOperands(); - // Make sure that we get added to a machine basicblock - LeakDetector::addGarbageObject(this); - MBB->push_back(this); // Add instruction to end of basic block! + addImplicitDefUseOperands(MF); } /// MachineInstr ctor - Copies MachineInstr arg exactly /// MachineInstr::MachineInstr(MachineFunction &MF, const MachineInstr &MI) - : MCID(&MI.getDesc()), Flags(0), AsmPrinterFlags(0), + : MCID(&MI.getDesc()), Parent(0), Operands(0), NumOperands(0), + Flags(0), AsmPrinterFlags(0), NumMemRefs(MI.NumMemRefs), MemRefs(MI.MemRefs), - Parent(0), debugLoc(MI.getDebugLoc()) { - Operands.reserve(MI.getNumOperands()); + debugLoc(MI.getDebugLoc()) { + CapOperands = OperandCapacity::get(MI.getNumOperands()); + Operands = MF.allocateOperandArray(CapOperands); - // Add operands + // Copy operands. for (unsigned i = 0; i != MI.getNumOperands(); ++i) - addOperand(MI.getOperand(i)); + addOperand(MF, MI.getOperand(i)); - // Copy all the flags. - Flags = MI.Flags; - - // Set parent to null. - Parent = 0; - - LeakDetector::addGarbageObject(this); -} - -MachineInstr::~MachineInstr() { - LeakDetector::removeGarbageObject(this); -#ifndef NDEBUG - for (unsigned i = 0, e = Operands.size(); i != e; ++i) { - assert(Operands[i].ParentMI == this && "ParentMI mismatch!"); - assert((!Operands[i].isReg() || !Operands[i].isOnRegUseList()) && - "Reg operand def/use list corrupted"); - } -#endif + // Copy all the sensible flags. + setFlags(MI.Flags); } /// getRegInfo - If this instruction is embedded into a MachineFunction, @@ -616,7 +576,7 @@ MachineRegisterInfo *MachineInstr::getRegInfo() { /// this instruction from their respective use lists. This requires that the /// operands already be on their use lists. void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (Operands[i].isReg()) MRI.removeRegOperandFromUseList(&Operands[i]); } @@ -625,40 +585,65 @@ void MachineInstr::RemoveRegOperandsFromUseLists(MachineRegisterInfo &MRI) { /// this instruction from their respective use lists. This requires that the /// operands not be on their use lists yet. void MachineInstr::AddRegOperandsToUseLists(MachineRegisterInfo &MRI) { - for (unsigned i = 0, e = Operands.size(); i != e; ++i) + for (unsigned i = 0, e = getNumOperands(); i != e; ++i) if (Operands[i].isReg()) MRI.addRegOperandToUseList(&Operands[i]); } +void MachineInstr::addOperand(const MachineOperand &Op) { + MachineBasicBlock *MBB = getParent(); + assert(MBB && "Use MachineInstrBuilder to add operands to dangling instrs"); + MachineFunction *MF = MBB->getParent(); + assert(MF && "Use MachineInstrBuilder to add operands to dangling instrs"); + addOperand(*MF, Op); +} + +/// Move NumOps MachineOperands from Src to Dst, with support for overlapping +/// ranges. If MRI is non-null also update use-def chains. +static void moveOperands(MachineOperand *Dst, MachineOperand *Src, + unsigned NumOps, MachineRegisterInfo *MRI) { + if (MRI) + return MRI->moveOperands(Dst, Src, NumOps); + + // Here it would be convenient to call memmove, so that isn't allowed because + // MachineOperand has a constructor and so isn't a POD type. + if (Dst < Src) + for (unsigned i = 0; i != NumOps; ++i) + new (Dst + i) MachineOperand(Src[i]); + else + for (unsigned i = NumOps; i ; --i) + new (Dst + i - 1) MachineOperand(Src[i - 1]); +} + /// addOperand - Add the specified operand to the instruction. If it is an /// implicit operand, it is added to the end of the operand list. If it is /// an explicit operand it is added at the end of the explicit operand list /// (before the first implicit operand). -void MachineInstr::addOperand(const MachineOperand &Op) { +void MachineInstr::addOperand(MachineFunction &MF, const MachineOperand &Op) { assert(MCID && "Cannot add operands before providing an instr descriptor"); - bool isImpReg = Op.isReg() && Op.isImplicit(); - MachineRegisterInfo *RegInfo = getRegInfo(); - // If the Operands backing store is reallocated, all register operands must - // be removed and re-added to RegInfo. It is storing pointers to operands. - bool Reallocate = RegInfo && - !Operands.empty() && Operands.size() == Operands.capacity(); + // Check if we're adding one of our existing operands. + if (&Op >= Operands && &Op < Operands + NumOperands) { + // This is unusual: MI->addOperand(MI->getOperand(i)). + // If adding Op requires reallocating or moving existing operands around, + // the Op reference could go stale. Support it by copying Op. + MachineOperand CopyOp(Op); + return addOperand(MF, CopyOp); + } // Find the insert location for the new operand. Implicit registers go at - // the end, everything goes before the implicit regs. - unsigned OpNo = Operands.size(); - - // Remove all the implicit operands from RegInfo if they need to be shifted. + // the end, everything else goes before the implicit regs. + // // FIXME: Allow mixed explicit and implicit operands on inline asm. // InstrEmitter::EmitSpecialNode() is marking inline asm clobbers as // implicit-defs, but they must not be moved around. See the FIXME in // InstrEmitter.cpp. + unsigned OpNo = getNumOperands(); + bool isImpReg = Op.isReg() && Op.isImplicit(); if (!isImpReg && !isInlineAsm()) { while (OpNo && Operands[OpNo-1].isReg() && Operands[OpNo-1].isImplicit()) { --OpNo; assert(!Operands[OpNo].isTied() && "Cannot move tied operands"); - if (RegInfo) - RegInfo->removeRegOperandFromUseList(&Operands[OpNo]); } } @@ -669,55 +654,56 @@ void MachineInstr::addOperand(const MachineOperand &Op) { OpNo < MCID->getNumOperands()) && "Trying to add an operand to a machine instr that is already done!"); - // All operands from OpNo have been removed from RegInfo. If the Operands - // backing store needs to be reallocated, we also need to remove any other - // register operands. - if (Reallocate) - for (unsigned i = 0; i != OpNo; ++i) - if (Operands[i].isReg()) - RegInfo->removeRegOperandFromUseList(&Operands[i]); - - // Insert the new operand at OpNo. - Operands.insert(Operands.begin() + OpNo, Op); - Operands[OpNo].ParentMI = this; - - // The Operands backing store has now been reallocated, so we can re-add the - // operands before OpNo. - if (Reallocate) - for (unsigned i = 0; i != OpNo; ++i) - if (Operands[i].isReg()) - RegInfo->addRegOperandToUseList(&Operands[i]); - - // When adding a register operand, tell RegInfo about it. - if (Operands[OpNo].isReg()) { + MachineRegisterInfo *MRI = getRegInfo(); + + // Determine if the Operands array needs to be reallocated. + // Save the old capacity and operand array. + OperandCapacity OldCap = CapOperands; + MachineOperand *OldOperands = Operands; + if (!OldOperands || OldCap.getSize() == getNumOperands()) { + CapOperands = OldOperands ? OldCap.getNext() : OldCap.get(1); + Operands = MF.allocateOperandArray(CapOperands); + // Move the operands before the insertion point. + if (OpNo) + moveOperands(Operands, OldOperands, OpNo, MRI); + } + + // Move the operands following the insertion point. + if (OpNo != NumOperands) + moveOperands(Operands + OpNo + 1, OldOperands + OpNo, NumOperands - OpNo, + MRI); + ++NumOperands; + + // Deallocate the old operand array. + if (OldOperands != Operands && OldOperands) + MF.deallocateOperandArray(OldCap, OldOperands); + + // Copy Op into place. It still needs to be inserted into the MRI use lists. + MachineOperand *NewMO = new (Operands + OpNo) MachineOperand(Op); + NewMO->ParentMI = this; + + // When adding a register operand, tell MRI about it. + if (NewMO->isReg()) { // Ensure isOnRegUseList() returns false, regardless of Op's status. - Operands[OpNo].Contents.Reg.Prev = 0; + NewMO->Contents.Reg.Prev = 0; // Ignore existing ties. This is not a property that can be copied. - Operands[OpNo].TiedTo = 0; - // Add the new operand to RegInfo. - if (RegInfo) - RegInfo->addRegOperandToUseList(&Operands[OpNo]); + NewMO->TiedTo = 0; + // Add the new operand to MRI, but only for instructions in an MBB. + if (MRI) + MRI->addRegOperandToUseList(NewMO); // The MCID operand information isn't accurate until we start adding // explicit operands. The implicit operands are added first, then the // explicits are inserted before them. if (!isImpReg) { // Tie uses to defs as indicated in MCInstrDesc. - if (Operands[OpNo].isUse()) { + if (NewMO->isUse()) { int DefIdx = MCID->getOperandConstraint(OpNo, MCOI::TIED_TO); if (DefIdx != -1) tieOperands(DefIdx, OpNo); } // If the register operand is flagged as early, mark the operand as such. if (MCID->getOperandConstraint(OpNo, MCOI::EARLY_CLOBBER) != -1) - Operands[OpNo].setIsEarlyClobber(true); - } - } - - // Re-add all the implicit ops. - if (RegInfo) { - for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) { - assert(Operands[i].isReg() && "Should only be an implicit reg!"); - RegInfo->addRegOperandToUseList(&Operands[i]); + NewMO->setIsEarlyClobber(true); } } } @@ -726,45 +712,27 @@ void MachineInstr::addOperand(const MachineOperand &Op) { /// fewer operand than it started with. /// void MachineInstr::RemoveOperand(unsigned OpNo) { - assert(OpNo < Operands.size() && "Invalid operand number"); + assert(OpNo < getNumOperands() && "Invalid operand number"); untieRegOperand(OpNo); - MachineRegisterInfo *RegInfo = getRegInfo(); - - // Special case removing the last one. - if (OpNo == Operands.size()-1) { - // If needed, remove from the reg def/use list. - if (RegInfo && Operands.back().isReg() && Operands.back().isOnRegUseList()) - RegInfo->removeRegOperandFromUseList(&Operands.back()); - - Operands.pop_back(); - return; - } - - // Otherwise, we are removing an interior operand. If we have reginfo to - // update, remove all operands that will be shifted down from their reg lists, - // move everything down, then re-add them. - if (RegInfo) { - for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { - if (Operands[i].isReg()) - RegInfo->removeRegOperandFromUseList(&Operands[i]); - } - } #ifndef NDEBUG // Moving tied operands would break the ties. - for (unsigned i = OpNo + 1, e = Operands.size(); i != e; ++i) + for (unsigned i = OpNo + 1, e = getNumOperands(); i != e; ++i) if (Operands[i].isReg()) assert(!Operands[i].isTied() && "Cannot move tied operands"); #endif - Operands.erase(Operands.begin()+OpNo); + MachineRegisterInfo *MRI = getRegInfo(); + if (MRI && Operands[OpNo].isReg()) + MRI->removeRegOperandFromUseList(Operands + OpNo); - if (RegInfo) { - for (unsigned i = OpNo, e = Operands.size(); i != e; ++i) { - if (Operands[i].isReg()) - RegInfo->addRegOperandToUseList(&Operands[i]); - } - } + // Don't call the MachineOperand destructor. A lot of this code depends on + // MachineOperand having a trivial destructor anyway, and adding a call here + // wouldn't make it 'destructor-correct'. + + if (unsigned N = NumOperands - 1 - OpNo) + moveOperands(Operands + OpNo, Operands + OpNo + 1, N, MRI); + --NumOperands; } /// addMemOperand - Add a MachineMemOperand to the machine instruction. @@ -773,33 +741,30 @@ void MachineInstr::RemoveOperand(unsigned OpNo) { void MachineInstr::addMemOperand(MachineFunction &MF, MachineMemOperand *MO) { mmo_iterator OldMemRefs = MemRefs; - uint16_t OldNumMemRefs = NumMemRefs; + unsigned OldNumMemRefs = NumMemRefs; - uint16_t NewNum = NumMemRefs + 1; + unsigned NewNum = NumMemRefs + 1; mmo_iterator NewMemRefs = MF.allocateMemRefsArray(NewNum); std::copy(OldMemRefs, OldMemRefs + OldNumMemRefs, NewMemRefs); NewMemRefs[NewNum - 1] = MO; - - MemRefs = NewMemRefs; - NumMemRefs = NewNum; + setMemRefs(NewMemRefs, NewMemRefs + NewNum); } bool MachineInstr::hasPropertyInBundle(unsigned Mask, QueryType Type) const { - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator MII = *this; ++MII; - while (MII != MBB->end() && MII->isInsideBundle()) { + assert(!isBundledWithPred() && "Must be called on bundle header"); + for (MachineBasicBlock::const_instr_iterator MII = this;; ++MII) { if (MII->getDesc().getFlags() & Mask) { if (Type == AnyInBundle) return true; } else { - if (Type == AllInBundle) + if (Type == AllInBundle && !MII->isBundle()) return false; } - ++MII; + // This was the last instruction in the bundle. + if (!MII->isBundledWithSucc()) + return Type == AllInBundle; } - - return Type == AllInBundle; } bool MachineInstr::isIdenticalTo(const MachineInstr *Other, @@ -865,46 +830,25 @@ bool MachineInstr::isIdenticalTo(const MachineInstr *Other, return true; } -/// removeFromParent - This method unlinks 'this' from the containing basic -/// block, and returns it, but does not delete it. MachineInstr *MachineInstr::removeFromParent() { assert(getParent() && "Not embedded in a basic block!"); - - // If it's a bundle then remove the MIs inside the bundle as well. - if (isBundle()) { - MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::instr_iterator MII = *this; ++MII; - MachineBasicBlock::instr_iterator E = MBB->instr_end(); - while (MII != E && MII->isInsideBundle()) { - MachineInstr *MI = &*MII; - ++MII; - MBB->remove(MI); - } - } - getParent()->remove(this); - return this; + return getParent()->remove(this); } +MachineInstr *MachineInstr::removeFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + return getParent()->remove_instr(this); +} -/// eraseFromParent - This method unlinks 'this' from the containing basic -/// block, and deletes it. void MachineInstr::eraseFromParent() { assert(getParent() && "Not embedded in a basic block!"); - // If it's a bundle then remove the MIs inside the bundle as well. - if (isBundle()) { - MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::instr_iterator MII = *this; ++MII; - MachineBasicBlock::instr_iterator E = MBB->instr_end(); - while (MII != E && MII->isInsideBundle()) { - MachineInstr *MI = &*MII; - ++MII; - MBB->erase(MI); - } - } - // Erase the individual instruction, which may itself be inside a bundle. - getParent()->erase_instr(this); + getParent()->erase(this); } +void MachineInstr::eraseFromBundle() { + assert(getParent() && "Not embedded in a basic block!"); + getParent()->erase_instr(this); +} /// getNumExplicitOperands - Returns the number of non-implicit operands. /// @@ -921,14 +865,40 @@ unsigned MachineInstr::getNumExplicitOperands() const { return NumOperands; } -/// isBundled - Return true if this instruction part of a bundle. This is true -/// if either itself or its following instruction is marked "InsideBundle". -bool MachineInstr::isBundled() const { - if (isInsideBundle()) - return true; - MachineBasicBlock::const_instr_iterator nextMI = this; - ++nextMI; - return nextMI != Parent->instr_end() && nextMI->isInsideBundle(); +void MachineInstr::bundleWithPred() { + assert(!isBundledWithPred() && "MI is already bundled with its predecessor"); + setFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = this; + --Pred; + assert(!Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->setFlag(BundledSucc); +} + +void MachineInstr::bundleWithSucc() { + assert(!isBundledWithSucc() && "MI is already bundled with its successor"); + setFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = this; + ++Succ; + assert(!Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->setFlag(BundledPred); +} + +void MachineInstr::unbundleFromPred() { + assert(isBundledWithPred() && "MI isn't bundled with its predecessor"); + clearFlag(BundledPred); + MachineBasicBlock::instr_iterator Pred = this; + --Pred; + assert(Pred->isBundledWithSucc() && "Inconsistent bundle flags"); + Pred->clearFlag(BundledSucc); +} + +void MachineInstr::unbundleFromSucc() { + assert(isBundledWithSucc() && "MI isn't bundled with its successor"); + clearFlag(BundledSucc); + MachineBasicBlock::instr_iterator Succ = this; + ++Succ; + assert(Succ->isBundledWithPred() && "Inconsistent bundle flags"); + Succ->clearFlag(BundledPred); } bool MachineInstr::isStackAligningInlineAsm() const { @@ -1011,18 +981,13 @@ MachineInstr::getRegClassConstraint(unsigned OpIdx, return NULL; } -/// getBundleSize - Return the number of instructions inside the MI bundle. +/// Return the number of instructions inside the MI bundle, not counting the +/// header instruction. unsigned MachineInstr::getBundleSize() const { - assert(isBundle() && "Expecting a bundle"); - - const MachineBasicBlock *MBB = getParent(); - MachineBasicBlock::const_instr_iterator I = *this, E = MBB->instr_end(); + MachineBasicBlock::const_instr_iterator I = this; unsigned Size = 0; - while ((++I != E) && I->isInsideBundle()) { - ++Size; - } - assert(Size > 1 && "Malformed bundle"); - + while (I->isBundledWithSucc()) + ++Size, ++I; return Size; } @@ -1231,41 +1196,6 @@ void MachineInstr::clearKillInfo() { } } -/// copyKillDeadInfo - Copies kill / dead operand properties from MI. -/// -void MachineInstr::copyKillDeadInfo(const MachineInstr *MI) { - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || (!MO.isKill() && !MO.isDead())) - continue; - for (unsigned j = 0, ee = getNumOperands(); j != ee; ++j) { - MachineOperand &MOp = getOperand(j); - if (!MOp.isIdenticalTo(MO)) - continue; - if (MO.isKill()) - MOp.setIsKill(); - else - MOp.setIsDead(); - break; - } - } -} - -/// copyPredicates - Copies predicate operand(s) from MI. -void MachineInstr::copyPredicates(const MachineInstr *MI) { - assert(!isBundle() && "MachineInstr::copyPredicates() can't handle bundles"); - - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isPredicable()) - return; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MCID.OpInfo[i].isPredicate()) { - // Predicated operands must be last operands. - addOperand(MI->getOperand(i)); - } - } -} - void MachineInstr::substituteRegister(unsigned FromReg, unsigned ToReg, unsigned SubIdx, @@ -1460,12 +1390,13 @@ bool MachineInstr::allDefsAreDead() const { /// copyImplicitOps - Copy implicit register operands from specified /// instruction to this instruction. -void MachineInstr::copyImplicitOps(const MachineInstr *MI) { +void MachineInstr::copyImplicitOps(MachineFunction &MF, + const MachineInstr *MI) { for (unsigned i = MI->getDesc().getNumOperands(), e = MI->getNumOperands(); i != e; ++i) { const MachineOperand &MO = MI->getOperand(i); if (MO.isReg() && MO.isImplicit()) - addOperand(MO); + addOperand(MF, MO); } } @@ -1497,7 +1428,8 @@ static void printDebugLoc(DebugLoc DL, const MachineFunction *MF, } } -void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { +void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM, + bool SkipOpers) const { // We can be a bit tidier if we know the TargetMachine and/or MachineFunction. const MachineFunction *MF = 0; const MachineRegisterInfo *MRI = 0; @@ -1534,6 +1466,9 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { else OS << "UNKNOWN"; + if (SkipOpers) + return; + // Print the rest of the operands. bool OmittedAnyCallClobbers = false; bool FirstOp = true; @@ -1545,10 +1480,14 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { OS << " "; getOperand(InlineAsm::MIOp_AsmString).print(OS, TM); - // Print HasSideEffects, IsAlignStack + // Print HasSideEffects, MayLoad, MayStore, IsAlignStack unsigned ExtraInfo = getOperand(InlineAsm::MIOp_ExtraInfo).getImm(); if (ExtraInfo & InlineAsm::Extra_HasSideEffects) OS << " [sideeffect]"; + if (ExtraInfo & InlineAsm::Extra_MayLoad) + OS << " [mayload]"; + if (ExtraInfo & InlineAsm::Extra_MayStore) + OS << " [maystore]"; if (ExtraInfo & InlineAsm::Extra_IsAlignStack) OS << " [alignstack]"; if (getInlineAsmDialect() == InlineAsm::AD_ATT) @@ -1576,12 +1515,12 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { unsigned Reg = MO.getReg(); if (TargetRegisterInfo::isPhysicalRegister(Reg)) { const MachineRegisterInfo &MRI = MF->getRegInfo(); - if (MRI.use_empty(Reg) && !MRI.isLiveOut(Reg)) { + if (MRI.use_empty(Reg)) { bool HasAliasLive = false; for (MCRegAliasIterator AI(Reg, TM->getRegisterInfo(), true); AI.isValid(); ++AI) { unsigned AliasReg = *AI; - if (!MRI.use_empty(AliasReg) || MRI.isLiveOut(AliasReg)) { + if (!MRI.use_empty(AliasReg)) { HasAliasLive = true; break; } @@ -1653,7 +1592,8 @@ void MachineInstr::print(raw_ostream &OS, const TargetMachine *TM) const { } bool HaveSemi = false; - if (Flags) { + const unsigned PrintableFlags = FrameSetup; + if (Flags & PrintableFlags) { if (!HaveSemi) OS << ";"; HaveSemi = true; OS << " flags: "; diff --git a/lib/CodeGen/MachineInstrBundle.cpp b/lib/CodeGen/MachineInstrBundle.cpp index 1f7fbfc719b0..77bcd1d7c8e3 100644 --- a/lib/CodeGen/MachineInstrBundle.cpp +++ b/lib/CodeGen/MachineInstrBundle.cpp @@ -8,14 +8,14 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineInstrBundle.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" using namespace llvm; namespace { @@ -47,8 +47,8 @@ bool UnpackMachineBundles::runOnMachineFunction(MachineFunction &MF) { // Remove BUNDLE instruction and the InsideBundle flags from bundled // instructions. if (MI->isBundle()) { - while (++MII != MIE && MII->isInsideBundle()) { - MII->setIsInsideBundle(false); + while (++MII != MIE && MII->isBundledWithPred()) { + MII->unbundleFromPred(); for (unsigned i = 0, e = MII->getNumOperands(); i != e; ++i) { MachineOperand &MO = MII->getOperand(i); if (MO.isReg() && MO.isInternalRead()) @@ -101,13 +101,15 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI) { assert(FirstMI != LastMI && "Empty bundle?"); + MIBundleBuilder Bundle(MBB, FirstMI, LastMI); const TargetMachine &TM = MBB.getParent()->getTarget(); const TargetInstrInfo *TII = TM.getInstrInfo(); const TargetRegisterInfo *TRI = TM.getRegisterInfo(); - MachineInstrBuilder MIB = BuildMI(MBB, FirstMI, FirstMI->getDebugLoc(), + MachineInstrBuilder MIB = BuildMI(*MBB.getParent(), FirstMI->getDebugLoc(), TII->get(TargetOpcode::BUNDLE)); + Bundle.prepend(MIB); SmallVector LocalDefs; SmallSet LocalDefSet; @@ -177,7 +179,6 @@ void llvm::finalizeBundle(MachineBasicBlock &MBB, } } - FirstMI->setIsInsideBundle(); Defs.clear(); } @@ -223,14 +224,13 @@ bool llvm::finalizeBundles(MachineFunction &MF) { bool Changed = false; for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { MachineBasicBlock &MBB = *I; - MachineBasicBlock::instr_iterator MII = MBB.instr_begin(); - assert(!MII->isInsideBundle() && - "First instr cannot be inside bundle before finalization!"); - MachineBasicBlock::instr_iterator MIE = MBB.instr_end(); if (MII == MIE) continue; + assert(!MII->isInsideBundle() && + "First instr cannot be inside bundle before finalization!"); + for (++MII; MII != MIE; ) { if (!MII->isInsideBundle()) ++MII; @@ -281,7 +281,7 @@ MachineOperandIteratorBase::PhysRegInfo MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, const TargetRegisterInfo *TRI) { bool AllDefsDead = true; - PhysRegInfo PRI = {false, false, false, false, false, false, false}; + PhysRegInfo PRI = {false, false, false, false, false, false}; assert(TargetRegisterInfo::isPhysicalRegister(Reg) && "analyzePhysReg not given a physical register!"); @@ -305,7 +305,9 @@ MachineOperandIteratorBase::analyzePhysReg(unsigned Reg, // Reg or a super-reg is read, and perhaps killed also. PRI.Reads = true; PRI.Kills = MO.isKill(); - } if (IsRegOrOverlapping && MO.readsReg()) { + } + + if (IsRegOrOverlapping && MO.readsReg()) { PRI.ReadsOverlap = true;// Reg or an overlapping register is read. } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index 169443e03d77..ed3ed4d4d916 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -22,6 +22,10 @@ #define DEBUG_TYPE "machine-licm" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" @@ -29,17 +33,13 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt @@ -62,7 +62,7 @@ namespace { class MachineLICM : public MachineFunctionPass { const TargetMachine *TM; const TargetInstrInfo *TII; - const TargetLowering *TLI; + const TargetLoweringBase *TLI; const TargetRegisterInfo *TRI; const MachineFrameInfo *MFI; MachineRegisterInfo *MRI; @@ -780,7 +780,7 @@ MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI, unsigned Reg, unsigned OpIdx, unsigned &RCId, unsigned &RCCost) const { const TargetRegisterClass *RC = MRI->getRegClass(Reg); - EVT VT = *RC->vt_begin(); + MVT VT = *RC->vt_begin(); if (VT == MVT::Untyped) { RCId = RC->getID(); RCCost = 1; diff --git a/lib/CodeGen/MachineLoopInfo.cpp b/lib/CodeGen/MachineLoopInfo.cpp index 27afeec1d973..4e2cfdc4e568 100644 --- a/lib/CodeGen/MachineLoopInfo.cpp +++ b/lib/CodeGen/MachineLoopInfo.cpp @@ -15,9 +15,9 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Analysis/LoopInfoImpl.h" #include "llvm/Support/Debug.h" using namespace llvm; diff --git a/lib/CodeGen/MachineLoopRanges.cpp b/lib/CodeGen/MachineLoopRanges.cpp deleted file mode 100644 index 17fe67f65045..000000000000 --- a/lib/CodeGen/MachineLoopRanges.cpp +++ /dev/null @@ -1,116 +0,0 @@ -//===- MachineLoopRanges.cpp - Ranges of machine loops --------------------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file provides the implementation of the MachineLoopRanges analysis. -// -//===----------------------------------------------------------------------===// - -#include "llvm/CodeGen/MachineLoopRanges.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/Passes.h" - -using namespace llvm; - -char MachineLoopRanges::ID = 0; -INITIALIZE_PASS_BEGIN(MachineLoopRanges, "machine-loop-ranges", - "Machine Loop Ranges", true, true) -INITIALIZE_PASS_DEPENDENCY(SlotIndexes) -INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) -INITIALIZE_PASS_END(MachineLoopRanges, "machine-loop-ranges", - "Machine Loop Ranges", true, true) - -char &llvm::MachineLoopRangesID = MachineLoopRanges::ID; - -void MachineLoopRanges::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - AU.addRequiredTransitive(); - AU.addRequiredTransitive(); - MachineFunctionPass::getAnalysisUsage(AU); -} - -/// runOnMachineFunction - Don't do much, loop ranges are computed on demand. -bool MachineLoopRanges::runOnMachineFunction(MachineFunction &) { - releaseMemory(); - Indexes = &getAnalysis(); - return false; -} - -void MachineLoopRanges::releaseMemory() { - DeleteContainerSeconds(Cache); - Cache.clear(); -} - -MachineLoopRange *MachineLoopRanges::getLoopRange(const MachineLoop *Loop) { - MachineLoopRange *&Range = Cache[Loop]; - if (!Range) - Range = new MachineLoopRange(Loop, Allocator, *Indexes); - return Range; -} - -/// Create a MachineLoopRange, only accessible to MachineLoopRanges. -MachineLoopRange::MachineLoopRange(const MachineLoop *loop, - MachineLoopRange::Allocator &alloc, - SlotIndexes &Indexes) - : Loop(loop), Intervals(alloc), Area(0) { - // Compute loop coverage. - for (MachineLoop::block_iterator I = Loop->block_begin(), - E = Loop->block_end(); I != E; ++I) { - const std::pair &Range = Indexes.getMBBRange(*I); - Intervals.insert(Range.first, Range.second, 1u); - Area += Range.first.distance(Range.second); - } -} - -/// overlaps - Return true if this loop overlaps the given range of machine -/// instructions. -bool MachineLoopRange::overlaps(SlotIndex Start, SlotIndex Stop) { - Map::const_iterator I = Intervals.find(Start); - return I.valid() && Stop > I.start(); -} - -unsigned MachineLoopRange::getNumber() const { - return Loop->getHeader()->getNumber(); -} - -/// byNumber - Comparator for array_pod_sort that sorts a list of -/// MachineLoopRange pointers by number. -int MachineLoopRange::byNumber(const void *pa, const void *pb) { - const MachineLoopRange *a = *static_cast(pa); - const MachineLoopRange *b = *static_cast(pb); - unsigned na = a->getNumber(); - unsigned nb = b->getNumber(); - if (na < nb) - return -1; - if (na > nb) - return 1; - return 0; -} - -/// byAreaDesc - Comparator for array_pod_sort that sorts a list of -/// MachineLoopRange pointers by: -/// 1. Descending area. -/// 2. Ascending number. -int MachineLoopRange::byAreaDesc(const void *pa, const void *pb) { - const MachineLoopRange *a = *static_cast(pa); - const MachineLoopRange *b = *static_cast(pb); - if (a->getArea() != b->getArea()) - return a->getArea() > b->getArea() ? -1 : 1; - return byNumber(pa, pb); -} - -void MachineLoopRange::print(raw_ostream &OS) const { - OS << "Loop#" << getNumber() << " ="; - for (Map::const_iterator I = Intervals.begin(); I.valid(); ++I) - OS << " [" << I.start() << ';' << I.stop() << ')'; -} - -raw_ostream &llvm::operator<<(raw_ostream &OS, const MachineLoopRange &MLR) { - MLR.print(OS); - return OS; -} diff --git a/lib/CodeGen/MachineModuleInfo.cpp b/lib/CodeGen/MachineModuleInfo.cpp index 005bf783e3da..0ea9ae0fcc89 100644 --- a/lib/CodeGen/MachineModuleInfo.cpp +++ b/lib/CodeGen/MachineModuleInfo.cpp @@ -8,18 +8,17 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineModuleInfo.h" - -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Module.h" +#include "llvm/ADT/PointerUnion.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCObjectFileInfo.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/ADT/PointerUnion.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" using namespace llvm; @@ -254,15 +253,8 @@ void MMIAddrLabelMapCallbackPtr::allUsesReplacedWith(Value *V2) { MachineModuleInfo::MachineModuleInfo(const MCAsmInfo &MAI, const MCRegisterInfo &MRI, const MCObjectFileInfo *MOFI) - : ImmutablePass(ID), Context(MAI, MRI, MOFI), - ObjFileMMI(0), CompactUnwindEncoding(0), CurCallSite(0), CallsEHReturn(0), - CallsUnwindInit(0), DbgInfoAvailable(false), - UsesVAFloatArgument(false) { + : ImmutablePass(ID), Context(MAI, MRI, MOFI, 0, false) { initializeMachineModuleInfoPass(*PassRegistry::getPassRegistry()); - // Always emit some info, by default "no personality" info. - Personalities.push_back(NULL); - AddrLabelSymbols = 0; - TheModule = 0; } MachineModuleInfo::MachineModuleInfo() @@ -274,26 +266,36 @@ MachineModuleInfo::MachineModuleInfo() } MachineModuleInfo::~MachineModuleInfo() { - delete ObjFileMMI; +} - // FIXME: Why isn't doFinalization being called?? - //assert(AddrLabelSymbols == 0 && "doFinalization not called"); - delete AddrLabelSymbols; +bool MachineModuleInfo::doInitialization(Module &M) { + + ObjFileMMI = 0; + CompactUnwindEncoding = 0; + CurCallSite = 0; + CallsEHReturn = 0; + CallsUnwindInit = 0; + DbgInfoAvailable = UsesVAFloatArgument = false; + // Always emit some info, by default "no personality" info. + Personalities.push_back(NULL); AddrLabelSymbols = 0; -} + TheModule = 0; -/// doInitialization - Initialize the state for a new module. -/// -bool MachineModuleInfo::doInitialization() { - assert(AddrLabelSymbols == 0 && "Improperly initialized"); return false; } -/// doFinalization - Tear down the state after completion of a module. -/// -bool MachineModuleInfo::doFinalization() { +bool MachineModuleInfo::doFinalization(Module &M) { + + Personalities.clear(); + delete AddrLabelSymbols; AddrLabelSymbols = 0; + + Context.reset(); + + delete ObjFileMMI; + ObjFileMMI = 0; + return false; } diff --git a/lib/CodeGen/MachineRegisterInfo.cpp b/lib/CodeGen/MachineRegisterInfo.cpp index 95d7a7dd6897..1af00e84a6ed 100644 --- a/lib/CodeGen/MachineRegisterInfo.cpp +++ b/lib/CodeGen/MachineRegisterInfo.cpp @@ -30,12 +30,6 @@ MachineRegisterInfo::MachineRegisterInfo(const TargetRegisterInfo &TRI) } MachineRegisterInfo::~MachineRegisterInfo() { -#ifndef NDEBUG - clearVirtRegs(); - for (unsigned i = 0, e = TRI->getNumRegs(); i != e; ++i) - assert(!PhysRegUseDefLists[i] && - "PhysRegUseDefLists has entries after all instructions are deleted"); -#endif delete [] PhysRegUseDefLists; } @@ -43,6 +37,7 @@ MachineRegisterInfo::~MachineRegisterInfo() { /// void MachineRegisterInfo::setRegClass(unsigned Reg, const TargetRegisterClass *RC) { + assert(RC && RC->isAllocatable() && "Invalid RC for virtual register"); VRegInfo[Reg].first = RC; } @@ -180,6 +175,55 @@ void MachineRegisterInfo::removeRegOperandFromUseList(MachineOperand *MO) { MO->Contents.Reg.Next = 0; } +/// Move NumOps operands from Src to Dst, updating use-def lists as needed. +/// +/// The Dst range is assumed to be uninitialized memory. (Or it may contain +/// operands that won't be destroyed, which is OK because the MO destructor is +/// trivial anyway). +/// +/// The Src and Dst ranges may overlap. +void MachineRegisterInfo::moveOperands(MachineOperand *Dst, + MachineOperand *Src, + unsigned NumOps) { + assert(Src != Dst && NumOps && "Noop moveOperands"); + + // Copy backwards if Dst is within the Src range. + int Stride = 1; + if (Dst >= Src && Dst < Src + NumOps) { + Stride = -1; + Dst += NumOps - 1; + Src += NumOps - 1; + } + + // Copy one operand at a time. + do { + new (Dst) MachineOperand(*Src); + + // Dst takes Src's place in the use-def chain. + if (Src->isReg()) { + MachineOperand *&Head = getRegUseDefListHead(Src->getReg()); + MachineOperand *Prev = Src->Contents.Reg.Prev; + MachineOperand *Next = Src->Contents.Reg.Next; + assert(Head && "List empty, but operand is chained"); + assert(Prev && "Operand was not on use-def list"); + + // Prev links are circular, next link is NULL instead of looping back to + // Head. + if (Src == Head) + Head = Dst; + else + Prev->Contents.Reg.Next = Dst; + + // Update Prev pointer. This also works when Src was pointing to itself + // in a 1-element list. In that case Head == Dst. + (Next ? Next : Head)->Contents.Reg.Prev = Dst; + } + + Dst += Stride; + Src += Stride; + } while (--NumOps); +} + /// replaceRegWith - Replace all instances of FromReg with ToReg in the /// machine function. This is like llvm-level X->replaceAllUsesWith(Y), /// except that it also changes any definitions of the register as well. @@ -240,13 +284,6 @@ bool MachineRegisterInfo::isLiveIn(unsigned Reg) const { return false; } -bool MachineRegisterInfo::isLiveOut(unsigned Reg) const { - for (liveout_iterator I = liveout_begin(), E = liveout_end(); I != E; ++I) - if (*I == Reg) - return true; - return false; -} - /// getLiveInPhysReg - If VReg is a live-in virtual register, return the /// corresponding live-in physical register. unsigned MachineRegisterInfo::getLiveInPhysReg(unsigned VReg) const { diff --git a/lib/CodeGen/MachineSSAUpdater.cpp b/lib/CodeGen/MachineSSAUpdater.cpp index 076547a5ed87..bb6aad7f948e 100644 --- a/lib/CodeGen/MachineSSAUpdater.cpp +++ b/lib/CodeGen/MachineSSAUpdater.cpp @@ -13,19 +13,19 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/MachineSSAUpdater.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/AlignOf.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Transforms/Utils/SSAUpdaterImpl.h" using namespace llvm; @@ -109,7 +109,7 @@ unsigned LookForIdenticalPHI(MachineBasicBlock *BB, /// a value of the given register class at the start of the specified basic /// block. It returns the virtual register defined by the instruction. static -MachineInstr *InsertNewDef(unsigned Opcode, +MachineInstrBuilder InsertNewDef(unsigned Opcode, MachineBasicBlock *BB, MachineBasicBlock::iterator I, const TargetRegisterClass *RC, MachineRegisterInfo *MRI, @@ -183,13 +183,12 @@ unsigned MachineSSAUpdater::GetValueInMiddleOfBlock(MachineBasicBlock *BB) { // Otherwise, we do need a PHI: insert one now. MachineBasicBlock::iterator Loc = BB->empty() ? BB->end() : BB->begin(); - MachineInstr *InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, - Loc, VRC, MRI, TII); + MachineInstrBuilder InsertedPHI = InsertNewDef(TargetOpcode::PHI, BB, + Loc, VRC, MRI, TII); // Fill in all the predecessors of the PHI. - MachineInstrBuilder MIB(InsertedPHI); for (unsigned i = 0, e = PredValues.size(); i != e; ++i) - MIB.addReg(PredValues[i].second).addMBB(PredValues[i].first); + InsertedPHI.addReg(PredValues[i].second).addMBB(PredValues[i].first); // See if the PHI node can be merged to a single value. This can happen in // loop cases when we get a PHI of itself and one other value. @@ -316,8 +315,7 @@ public: /// the specified predecessor block. static void AddPHIOperand(MachineInstr *PHI, unsigned Val, MachineBasicBlock *Pred) { - PHI->addOperand(MachineOperand::CreateReg(Val, false)); - PHI->addOperand(MachineOperand::CreateMBB(Pred)); + MachineInstrBuilder(*Pred->getParent(), PHI).addReg(Val).addMBB(Pred); } /// InstrIsPHI - Check if an instruction is a PHI. diff --git a/lib/CodeGen/MachineScheduler.cpp b/lib/CodeGen/MachineScheduler.cpp index a4817d09c0d3..5bd2349b50f6 100644 --- a/lib/CodeGen/MachineScheduler.cpp +++ b/lib/CodeGen/MachineScheduler.cpp @@ -14,20 +14,22 @@ #define DEBUG_TYPE "misched" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineScheduler.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/PriorityQueue.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/ScheduleDAGILP.h" +#include "llvm/CodeGen/ScheduleDFS.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/PriorityQueue.h" - #include using namespace llvm; @@ -49,14 +51,19 @@ static cl::opt MISchedCutoff("misched-cutoff", cl::Hidden, static bool ViewMISchedDAGs = false; #endif // NDEBUG -// Threshold to very roughly model an out-of-order processor's instruction -// buffers. If the actual value of this threshold matters much in practice, then -// it can be specified by the machine model. For now, it's an experimental -// tuning knob to determine when and if it matters. -static cl::opt ILPWindow("ilp-window", cl::Hidden, - cl::desc("Allow expected latency to exceed the critical path by N cycles " - "before attempting to balance ILP"), - cl::init(10U)); +// Experimental heuristics +static cl::opt EnableLoadCluster("misched-cluster", cl::Hidden, + cl::desc("Enable load clustering."), cl::init(true)); + +// Experimental heuristics +static cl::opt EnableMacroFusion("misched-fusion", cl::Hidden, + cl::desc("Enable scheduling for macro fusion."), cl::init(true)); + +static cl::opt VerifyScheduling("verify-misched", cl::Hidden, + cl::desc("Verify machine instrs before and after machine scheduling")); + +// DAG subtrees must have at least this many nodes. +static const unsigned MinSubtreeSize = 8; //===----------------------------------------------------------------------===// // Machine Instruction Scheduling Pass and Registry @@ -195,6 +202,10 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { LIS = &getAnalysis(); const TargetInstrInfo *TII = MF->getTarget().getInstrInfo(); + if (VerifyScheduling) { + DEBUG(LIS->print(dbgs())); + MF->verify(this, "Before machine scheduling."); + } RegClassInfo->runOnMachineFunction(*MF); // Select the scheduler, or set the default. @@ -261,7 +272,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } DEBUG(dbgs() << "********** MI Scheduling **********\n"); DEBUG(dbgs() << MF->getName() - << ":BB#" << MBB->getNumber() << "\n From: " << *I << " To: "; + << ":BB#" << MBB->getNumber() << " " << MBB->getName() + << "\n From: " << *I << " To: "; if (RegionEnd != MBB->end()) dbgs() << *RegionEnd; else dbgs() << "End"; dbgs() << " Remaining: " << RemainingInstrs << "\n"); @@ -282,6 +294,8 @@ bool MachineScheduler::runOnMachineFunction(MachineFunction &mf) { } Scheduler->finalizeSchedule(); DEBUG(LIS->print(dbgs())); + if (VerifyScheduling) + MF->verify(this, "After machine scheduling."); return true; } @@ -291,7 +305,7 @@ void MachineScheduler::print(raw_ostream &O, const Module* m) const { #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ReadyQueue::dump() { - dbgs() << Name << ": "; + dbgs() << " " << Name << ": "; for (unsigned i = 0, e = Queue.size(); i < e; ++i) dbgs() << Queue[i]->NodeNum << " "; dbgs() << "\n"; @@ -303,6 +317,25 @@ void ReadyQueue::dump() { // preservation. //===----------------------------------------------------------------------===// +ScheduleDAGMI::~ScheduleDAGMI() { + delete DFSResult; + DeleteContainerPointers(Mutations); + delete SchedImpl; +} + +bool ScheduleDAGMI::addEdge(SUnit *SuccSU, const SDep &PredDep) { + if (SuccSU != &ExitSU) { + // Do not use WillCreateCycle, it assumes SD scheduling. + // If Pred is reachable from Succ, then the edge creates a cycle. + if (Topo.IsReachable(PredDep.getSUnit(), SuccSU)) + return false; + Topo.AddPred(SuccSU, PredDep.getSUnit()); + } + SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial()); + // Return true regardless of whether a new edge needed to be inserted. + return true; +} + /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. When /// NumPredsLeft reaches zero, release the successor node. /// @@ -310,6 +343,12 @@ void ReadyQueue::dump() { void ScheduleDAGMI::releaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + if (SuccEdge->isCluster()) + NextClusterSucc = SuccSU; + return; + } #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -338,6 +377,12 @@ void ScheduleDAGMI::releaseSuccessors(SUnit *SU) { void ScheduleDAGMI::releasePred(SUnit *SU, SDep *PredEdge) { SUnit *PredSU = PredEdge->getSUnit(); + if (PredEdge->isWeak()) { + --PredSU->WeakSuccsLeft; + if (PredEdge->isCluster()) + NextClusterPred = PredSU; + return; + } #ifndef NDEBUG if (PredSU->NumSuccsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -433,7 +478,8 @@ void ScheduleDAGMI::initRegPressure() { // Cache the list of excess pressure sets in this region. This will also track // the max pressure in the scheduled code for these sets. RegionCriticalPSets.clear(); - std::vector RegionPressure = RPTracker.getPressure().MaxSetPressure; + const std::vector &RegionPressure = + RPTracker.getPressure().MaxSetPressure; for (unsigned i = 0, e = RegionPressure.size(); i < e; ++i) { unsigned Limit = TRI->getRegPressureSetLimit(i); DEBUG(dbgs() << TRI->getRegPressureSetName(i) @@ -452,7 +498,7 @@ void ScheduleDAGMI::initRegPressure() { // FIXME: When the pressure tracker deals in pressure differences then we won't // iterate over all RegionCriticalPSets[i]. void ScheduleDAGMI:: -updateScheduledPressure(std::vector NewMaxPressure) { +updateScheduledPressure(const std::vector &NewMaxPressure) { for (unsigned i = 0, e = RegionCriticalPSets.size(); i < e; ++i) { unsigned ID = RegionCriticalPSets[i].PSetID; int &MaxUnits = RegionCriticalPSets[i].UnitIncrease; @@ -474,14 +520,23 @@ updateScheduledPressure(std::vector NewMaxPressure) { void ScheduleDAGMI::schedule() { buildDAGWithRegPressure(); + Topo.InitDAGTopologicalSorting(); + postprocessDAG(); + SmallVector TopRoots, BotRoots; + findRootsAndBiasEdges(TopRoots, BotRoots); + + // Initialize the strategy before modifying the DAG. + // This may initialize a DFSResult to be used for queue priority. + SchedImpl->initialize(this); + DEBUG(for (unsigned su = 0, e = SUnits.size(); su != e; ++su) SUnits[su].dumpAll(this)); - if (ViewMISchedDAGs) viewGraph(); - initQueues(); + // Initialize ready queues now that the DAG and priority data are finalized. + initQueues(TopRoots, BotRoots); bool IsTopNode = false; while (SUnit *SU = SchedImpl->pickNode(IsTopNode)) { @@ -498,7 +553,7 @@ void ScheduleDAGMI::schedule() { placeDebugValues(); DEBUG({ - unsigned BBNum = top()->getParent()->getNumber(); + unsigned BBNum = begin()->getParent()->getNumber(); dbgs() << "*** Final schedule for BB#" << BBNum << " ***\n"; dumpSchedule(); dbgs() << '\n'; @@ -516,7 +571,6 @@ void ScheduleDAGMI::buildDAGWithRegPressure() { // Build the DAG, and compute current register pressure. buildSchedGraph(AA, &RPTracker); - if (ViewMISchedDAGs) viewGraph(); // Initialize top/bottom trackers after computing region pressure. initRegPressure(); @@ -529,42 +583,67 @@ void ScheduleDAGMI::postprocessDAG() { } } -// Release all DAG roots for scheduling. -void ScheduleDAGMI::releaseRoots() { - SmallVector BotRoots; +void ScheduleDAGMI::computeDFSResult() { + if (!DFSResult) + DFSResult = new SchedDFSResult(/*BottomU*/true, MinSubtreeSize); + DFSResult->clear(); + ScheduledTrees.clear(); + DFSResult->resize(SUnits.size()); + DFSResult->compute(SUnits); + ScheduledTrees.resize(DFSResult->getNumSubtrees()); +} +void ScheduleDAGMI::findRootsAndBiasEdges(SmallVectorImpl &TopRoots, + SmallVectorImpl &BotRoots) { for (std::vector::iterator I = SUnits.begin(), E = SUnits.end(); I != E; ++I) { + SUnit *SU = &(*I); + assert(!SU->isBoundaryNode() && "Boundary node should not be in SUnits"); + + // Order predecessors so DFSResult follows the critical path. + SU->biasCriticalPath(); + // A SUnit is ready to top schedule if it has no predecessors. - if (I->Preds.empty()) - SchedImpl->releaseTopNode(&(*I)); + if (!I->NumPredsLeft) + TopRoots.push_back(SU); // A SUnit is ready to bottom schedule if it has no successors. - if (I->Succs.empty()) - BotRoots.push_back(&(*I)); + if (!I->NumSuccsLeft) + BotRoots.push_back(SU); } - // Release bottom roots in reverse order so the higher priority nodes appear - // first. This is more natural and slightly more efficient. - for (SmallVectorImpl::const_reverse_iterator - I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) - SchedImpl->releaseBottomNode(*I); + ExitSU.biasCriticalPath(); } /// Identify DAG roots and setup scheduler queues. -void ScheduleDAGMI::initQueues() { +void ScheduleDAGMI::initQueues(ArrayRef TopRoots, + ArrayRef BotRoots) { + NextClusterSucc = NULL; + NextClusterPred = NULL; - // Initialize the strategy before modifying the DAG. - SchedImpl->initialize(this); + // Release all DAG roots for scheduling, not including EntrySU/ExitSU. + // + // Nodes with unreleased weak edges can still be roots. + // Release top roots in forward order. + for (SmallVectorImpl::const_iterator + I = TopRoots.begin(), E = TopRoots.end(); I != E; ++I) { + SchedImpl->releaseTopNode(*I); + } + // Release bottom roots in reverse order so the higher priority nodes appear + // first. This is more natural and slightly more efficient. + for (SmallVectorImpl::const_reverse_iterator + I = BotRoots.rbegin(), E = BotRoots.rend(); I != E; ++I) { + SchedImpl->releaseBottomNode(*I); + } - // Release edges from the special Entry node or to the special Exit node. releaseSuccessors(&EntrySU); releasePredecessors(&ExitSU); - // Release all DAG roots for scheduling. - releaseRoots(); - SchedImpl->registerRoots(); + // Advance past initial DebugValues. + assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker"); CurrentTop = nextIfDebug(RegionBegin, RegionEnd); + TopRPTracker.setPos(CurrentTop); + CurrentBottom = RegionEnd; } @@ -618,6 +697,15 @@ void ScheduleDAGMI::updateQueues(SUnit *SU, bool IsTopNode) { SU->isScheduled = true; + if (DFSResult) { + unsigned SubtreeID = DFSResult->getSubtreeID(SU); + if (!ScheduledTrees.test(SubtreeID)) { + ScheduledTrees.set(SubtreeID); + DFSResult->scheduleTree(SubtreeID); + SchedImpl->scheduleTree(SubtreeID); + } + } + // Notify the scheduling strategy after updating the DAG. SchedImpl->schedNode(SU, IsTopNode); } @@ -635,6 +723,8 @@ void ScheduleDAGMI::placeDebugValues() { std::pair P = *prior(DI); MachineInstr *DbgValue = P.first; MachineBasicBlock::iterator OrigPrevMI = P.second; + if (&*RegionBegin == DbgValue) + ++RegionBegin; BB->splice(++OrigPrevMI, BB, DbgValue); if (OrigPrevMI == llvm::prior(RegionEnd)) RegionEnd = DbgValue; @@ -654,6 +744,166 @@ void ScheduleDAGMI::dumpSchedule() const { } #endif +//===----------------------------------------------------------------------===// +// LoadClusterMutation - DAG post-processing to cluster loads. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between neighboring +/// loads. +class LoadClusterMutation : public ScheduleDAGMutation { + struct LoadInfo { + SUnit *SU; + unsigned BaseReg; + unsigned Offset; + LoadInfo(SUnit *su, unsigned reg, unsigned ofs) + : SU(su), BaseReg(reg), Offset(ofs) {} + }; + static bool LoadInfoLess(const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS); + + const TargetInstrInfo *TII; + const TargetRegisterInfo *TRI; +public: + LoadClusterMutation(const TargetInstrInfo *tii, + const TargetRegisterInfo *tri) + : TII(tii), TRI(tri) {} + + virtual void apply(ScheduleDAGMI *DAG); +protected: + void clusterNeighboringLoads(ArrayRef Loads, ScheduleDAGMI *DAG); +}; +} // anonymous + +bool LoadClusterMutation::LoadInfoLess( + const LoadClusterMutation::LoadInfo &LHS, + const LoadClusterMutation::LoadInfo &RHS) { + if (LHS.BaseReg != RHS.BaseReg) + return LHS.BaseReg < RHS.BaseReg; + return LHS.Offset < RHS.Offset; +} + +void LoadClusterMutation::clusterNeighboringLoads(ArrayRef Loads, + ScheduleDAGMI *DAG) { + SmallVector LoadRecords; + for (unsigned Idx = 0, End = Loads.size(); Idx != End; ++Idx) { + SUnit *SU = Loads[Idx]; + unsigned BaseReg; + unsigned Offset; + if (TII->getLdStBaseRegImmOfs(SU->getInstr(), BaseReg, Offset, TRI)) + LoadRecords.push_back(LoadInfo(SU, BaseReg, Offset)); + } + if (LoadRecords.size() < 2) + return; + std::sort(LoadRecords.begin(), LoadRecords.end(), LoadInfoLess); + unsigned ClusterLength = 1; + for (unsigned Idx = 0, End = LoadRecords.size(); Idx < (End - 1); ++Idx) { + if (LoadRecords[Idx].BaseReg != LoadRecords[Idx+1].BaseReg) { + ClusterLength = 1; + continue; + } + + SUnit *SUa = LoadRecords[Idx].SU; + SUnit *SUb = LoadRecords[Idx+1].SU; + if (TII->shouldClusterLoads(SUa->getInstr(), SUb->getInstr(), ClusterLength) + && DAG->addEdge(SUb, SDep(SUa, SDep::Cluster))) { + + DEBUG(dbgs() << "Cluster loads SU(" << SUa->NodeNum << ") - SU(" + << SUb->NodeNum << ")\n"); + // Copy successor edges from SUa to SUb. Interleaving computation + // dependent on SUa can prevent load combining due to register reuse. + // Predecessor edges do not need to be copied from SUb to SUa since nearby + // loads should have effectively the same inputs. + for (SUnit::const_succ_iterator + SI = SUa->Succs.begin(), SE = SUa->Succs.end(); SI != SE; ++SI) { + if (SI->getSUnit() == SUb) + continue; + DEBUG(dbgs() << " Copy Succ SU(" << SI->getSUnit()->NodeNum << ")\n"); + DAG->addEdge(SI->getSUnit(), SDep(SUb, SDep::Artificial)); + } + ++ClusterLength; + } + else + ClusterLength = 1; + } +} + +/// \brief Callback from DAG postProcessing to create cluster edges for loads. +void LoadClusterMutation::apply(ScheduleDAGMI *DAG) { + // Map DAG NodeNum to store chain ID. + DenseMap StoreChainIDs; + // Map each store chain to a set of dependent loads. + SmallVector, 32> StoreChainDependents; + for (unsigned Idx = 0, End = DAG->SUnits.size(); Idx != End; ++Idx) { + SUnit *SU = &DAG->SUnits[Idx]; + if (!SU->getInstr()->mayLoad()) + continue; + unsigned ChainPredID = DAG->SUnits.size(); + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->isCtrl()) { + ChainPredID = PI->getSUnit()->NodeNum; + break; + } + } + // Check if this chain-like pred has been seen + // before. ChainPredID==MaxNodeID for loads at the top of the schedule. + unsigned NumChains = StoreChainDependents.size(); + std::pair::iterator, bool> Result = + StoreChainIDs.insert(std::make_pair(ChainPredID, NumChains)); + if (Result.second) + StoreChainDependents.resize(NumChains + 1); + StoreChainDependents[Result.first->second].push_back(SU); + } + // Iterate over the store chains. + for (unsigned Idx = 0, End = StoreChainDependents.size(); Idx != End; ++Idx) + clusterNeighboringLoads(StoreChainDependents[Idx], DAG); +} + +//===----------------------------------------------------------------------===// +// MacroFusion - DAG post-processing to encourage fusion of macro ops. +//===----------------------------------------------------------------------===// + +namespace { +/// \brief Post-process the DAG to create cluster edges between instructions +/// that may be fused by the processor into a single operation. +class MacroFusion : public ScheduleDAGMutation { + const TargetInstrInfo *TII; +public: + MacroFusion(const TargetInstrInfo *tii): TII(tii) {} + + virtual void apply(ScheduleDAGMI *DAG); +}; +} // anonymous + +/// \brief Callback from DAG postProcessing to create cluster edges to encourage +/// fused operations. +void MacroFusion::apply(ScheduleDAGMI *DAG) { + // For now, assume targets can only fuse with the branch. + MachineInstr *Branch = DAG->ExitSU.getInstr(); + if (!Branch) + return; + + for (unsigned Idx = DAG->SUnits.size(); Idx > 0;) { + SUnit *SU = &DAG->SUnits[--Idx]; + if (!TII->shouldScheduleAdjacent(SU->getInstr(), Branch)) + continue; + + // Create a single weak edge from SU to ExitSU. The only effect is to cause + // bottom-up scheduling to heavily prioritize the clustered SU. There is no + // need to copy predecessor edges from ExitSU to SU, since top-down + // scheduling cannot prioritize ExitSU anyway. To defer top-down scheduling + // of SU, we could create an artificial edge from the deepest root, but it + // hasn't been needed yet. + bool Success = DAG->addEdge(&DAG->ExitSU, SDep(SU, SDep::Cluster)); + (void)Success; + assert(Success && "No DAG nodes should be reachable from ExitSU"); + + DEBUG(dbgs() << "Macro Fuse SU(" << SU->NodeNum << ")\n"); + break; + } +} + //===----------------------------------------------------------------------===// // ConvergingScheduler - Implementation of the standard MachineSchedStrategy. //===----------------------------------------------------------------------===// @@ -666,9 +916,10 @@ public: /// Represent the type of SchedCandidate found within a single queue. /// pickNodeBidirectional depends on these listed by decreasing priority. enum CandReason { - NoCand, SingleExcess, SingleCritical, ResourceReduce, ResourceDemand, - BotHeightReduce, BotPathReduce, TopDepthReduce, TopPathReduce, - SingleMax, MultiPressure, NextDefUse, NodeOrder}; + NoCand, SingleExcess, SingleCritical, Cluster, + ResourceReduce, ResourceDemand, BotHeightReduce, BotPathReduce, + TopDepthReduce, TopPathReduce, SingleMax, MultiPressure, NextDefUse, + NodeOrder}; #ifndef NDEBUG static const char *getReasonStr(ConvergingScheduler::CandReason Reason); @@ -748,23 +999,26 @@ public: unsigned CritResIdx; // Number of micro-ops left to schedule. unsigned RemainingMicroOps; - // Is the unscheduled zone resource limited. - bool IsResourceLimited; - - unsigned MaxRemainingCount; void reset() { CriticalPath = 0; RemainingCounts.clear(); CritResIdx = 0; RemainingMicroOps = 0; - IsResourceLimited = false; - MaxRemainingCount = 0; } SchedRemainder() { reset(); } void init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel); + + unsigned getMaxRemainingCount(const TargetSchedModel *SchedModel) const { + if (!SchedModel->hasInstrSchedModel()) + return 0; + + return std::max( + RemainingMicroOps * SchedModel->getMicroOpFactor(), + RemainingCounts[CritResIdx]); + } }; /// Each Scheduling boundary is associated with ready queues. It tracks the @@ -805,15 +1059,15 @@ public: unsigned ExpectedCount; - // Policy flag: attempt to find ILP until expected latency is covered. - bool ShouldIncreaseILP; - #ifndef NDEBUG // Remember the greatest min operand latency. unsigned MaxMinLatency; #endif void reset() { + // A new HazardRec is created for each DAG and owned by SchedBoundary. + delete HazardRec; + Available.clear(); Pending.clear(); CheckPending = false; @@ -828,7 +1082,6 @@ public: CritResIdx = 0; IsResourceLimited = false; ExpectedCount = 0; - ShouldIncreaseILP = false; #ifndef NDEBUG MaxMinLatency = 0; #endif @@ -840,7 +1093,8 @@ public: /// PendingFlag set. SchedBoundary(unsigned ID, const Twine &Name): DAG(0), SchedModel(0), Rem(0), Available(ID, Name+".A"), - Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P") { + Pending(ID << ConvergingScheduler::LogMaxQID, Name+".P"), + HazardRec(0) { reset(); } @@ -856,7 +1110,7 @@ public: unsigned getUnscheduledLatency(SUnit *SU) const { if (isTop()) return SU->getHeight(); - return SU->getDepth(); + return SU->getDepth() + SU->Latency; } unsigned getCriticalCount() const { @@ -865,7 +1119,7 @@ public: bool checkHazard(SUnit *SU); - void checkILPPolicy(); + void setLatencyPolicy(CandPolicy &Policy); void releaseNode(SUnit *SU, unsigned ReadyCycle); @@ -938,7 +1192,7 @@ protected: SchedCandidate &Candidate); #ifndef NDEBUG - void traceCandidate(const SchedCandidate &Cand, const SchedBoundary &Zone); + void traceCandidate(const SchedCandidate &Cand); #endif }; } // namespace @@ -961,6 +1215,13 @@ init(ScheduleDAGMI *DAG, const TargetSchedModel *SchedModel) { RemainingCounts[PIdx] += (Factor * PI->Cycles); } } + for (unsigned PIdx = 0, PEnd = SchedModel->getNumProcResourceKinds(); + PIdx != PEnd; ++PIdx) { + if ((int)(RemainingCounts[PIdx] - RemainingCounts[CritResIdx]) + >= (int)SchedModel->getLatencyFactor()) { + CritResIdx = PIdx; + } + } } void ConvergingScheduler::SchedBoundary:: @@ -977,6 +1238,7 @@ void ConvergingScheduler::initialize(ScheduleDAGMI *dag) { DAG = dag; SchedModel = DAG->getSchedModel(); TRI = DAG->TRI; + Rem.init(DAG, SchedModel); Top.init(DAG, SchedModel, &Rem); Bot.init(DAG, SchedModel, &Rem); @@ -998,7 +1260,7 @@ void ConvergingScheduler::releaseTopNode(SUnit *SU) { if (SU->isScheduled) return; - for (SUnit::succ_iterator I = SU->Preds.begin(), E = SU->Preds.end(); + for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { unsigned PredReadyCycle = I->getSUnit()->TopReadyCycle; unsigned MinLatency = I->getMinLatency(); @@ -1019,6 +1281,8 @@ void ConvergingScheduler::releaseBottomNode(SUnit *SU) { for (SUnit::succ_iterator I = SU->Succs.begin(), E = SU->Succs.end(); I != E; ++I) { + if (I->isWeak()) + continue; unsigned SuccReadyCycle = I->getSUnit()->BotReadyCycle; unsigned MinLatency = I->getMinLatency(); #ifndef NDEBUG @@ -1067,12 +1331,28 @@ bool ConvergingScheduler::SchedBoundary::checkHazard(SUnit *SU) { return false; } -/// If expected latency is covered, disable ILP policy. -void ConvergingScheduler::SchedBoundary::checkILPPolicy() { - if (ShouldIncreaseILP - && (IsResourceLimited || ExpectedLatency <= CurrCycle)) { - ShouldIncreaseILP = false; - DEBUG(dbgs() << "Disable ILP: " << Available.getName() << '\n'); +/// Compute the remaining latency to determine whether ILP should be increased. +void ConvergingScheduler::SchedBoundary::setLatencyPolicy(CandPolicy &Policy) { + // FIXME: compile time. In all, we visit four queues here one we should only + // need to visit the one that was last popped if we cache the result. + unsigned RemLatency = 0; + for (ReadyQueue::iterator I = Available.begin(), E = Available.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + for (ReadyQueue::iterator I = Pending.begin(), E = Pending.end(); + I != E; ++I) { + unsigned L = getUnscheduledLatency(*I); + if (L > RemLatency) + RemLatency = L; + } + unsigned CriticalPathLimit = Rem->CriticalPath + SchedModel->getILPWindow(); + if (RemLatency + ExpectedLatency >= CriticalPathLimit + && RemLatency > Rem->getMaxRemainingCount(SchedModel)) { + Policy.ReduceLatency = true; + DEBUG(dbgs() << "Increase ILP: " << Available.getName() << '\n'); } } @@ -1091,15 +1371,6 @@ void ConvergingScheduler::SchedBoundary::releaseNode(SUnit *SU, // Record this node as an immediate dependent of the scheduled node. NextSUs.insert(SU); - - // If CriticalPath has been computed, then check if the unscheduled nodes - // exceed the ILP window. Before registerRoots, CriticalPath==0. - if (Rem->CriticalPath && (ExpectedLatency + getUnscheduledLatency(SU) - > Rem->CriticalPath + ILPWindow)) { - ShouldIncreaseILP = true; - DEBUG(dbgs() << "Increase ILP: " << Available.getName() << " " - << ExpectedLatency << " + " << getUnscheduledLatency(SU) << '\n'); - } } /// Move the boundary of scheduled code by one cycle. @@ -1130,8 +1401,8 @@ void ConvergingScheduler::SchedBoundary::bumpCycle() { CheckPending = true; IsResourceLimited = getCriticalCount() > std::max(ExpectedLatency, CurrCycle); - DEBUG(dbgs() << " *** " << Available.getName() << " cycle " - << CurrCycle << '\n'); + DEBUG(dbgs() << " " << Available.getName() + << " Cycle: " << CurrCycle << '\n'); } /// Add the given processor resource to this scheduled zone. @@ -1147,9 +1418,6 @@ void ConvergingScheduler::SchedBoundary::countResource(unsigned PIdx, assert(Rem->RemainingCounts[PIdx] >= Count && "resource double counted"); Rem->RemainingCounts[PIdx] -= Count; - // Reset MaxRemainingCount for sanity. - Rem->MaxRemainingCount = 0; - // Check if this resource exceeds the current critical resource by a full // cycle. If so, it becomes the critical resource. if ((int)(ResourceCounts[PIdx] - ResourceCounts[CritResIdx]) @@ -1281,9 +1549,7 @@ SUnit *ConvergingScheduler::SchedBoundary::pickOnlyChoice() { /// resources. /// /// If the CriticalZone is latency limited, don't force a policy for the -/// candidates here. Instead, When releasing each candidate, releaseNode -/// compares the region's critical path to the candidate's height or depth and -/// the scheduled zone's expected latency then sets ShouldIncreaseILP. +/// candidates here. Instead, setLatencyPolicy sets ReduceLatency if needed. void ConvergingScheduler::balanceZones( ConvergingScheduler::SchedBoundary &CriticalZone, ConvergingScheduler::SchedCandidate &CriticalCand, @@ -1292,6 +1558,7 @@ void ConvergingScheduler::balanceZones( if (!CriticalZone.IsResourceLimited) return; + assert(SchedModel->hasInstrSchedModel() && "required schedmodel"); SchedRemainder *Rem = CriticalZone.Rem; @@ -1299,7 +1566,7 @@ void ConvergingScheduler::balanceZones( // remainder, try to reduce it. unsigned RemainingCritCount = Rem->RemainingCounts[CriticalZone.CritResIdx]; - if ((int)(Rem->MaxRemainingCount - RemainingCritCount) + if ((int)(Rem->getMaxRemainingCount(SchedModel) - RemainingCritCount) > (int)SchedModel->getLatencyFactor()) { CriticalCand.Policy.ReduceResIdx = CriticalZone.CritResIdx; DEBUG(dbgs() << "Balance " << CriticalZone.Available.getName() << " reduce " @@ -1325,12 +1592,9 @@ void ConvergingScheduler::checkResourceLimits( ConvergingScheduler::SchedCandidate &TopCand, ConvergingScheduler::SchedCandidate &BotCand) { - Bot.checkILPPolicy(); - Top.checkILPPolicy(); - if (Bot.ShouldIncreaseILP) - BotCand.Policy.ReduceLatency = true; - if (Top.ShouldIncreaseILP) - TopCand.Policy.ReduceLatency = true; + // Set ReduceLatency to true if needed. + Bot.setLatencyPolicy(BotCand.Policy); + Top.setLatencyPolicy(TopCand.Policy); // Handle resource-limited regions. if (Top.IsResourceLimited && Bot.IsResourceLimited @@ -1365,9 +1629,6 @@ void ConvergingScheduler::checkResourceLimits( // The critical resource is different in each zone, so request balancing. // Compute the cost of each zone. - Rem.MaxRemainingCount = std::max( - Rem.RemainingMicroOps * SchedModel->getMicroOpFactor(), - Rem.RemainingCounts[Rem.CritResIdx]); Top.ExpectedCount = std::max(Top.ExpectedLatency, Top.CurrCycle); Top.ExpectedCount = std::max( Top.getCriticalCount(), @@ -1399,7 +1660,7 @@ initResourceDelta(const ScheduleDAGMI *DAG, } /// Return true if this heuristic determines order. -static bool tryLess(unsigned TryVal, unsigned CandVal, +static bool tryLess(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { @@ -1414,7 +1675,8 @@ static bool tryLess(unsigned TryVal, unsigned CandVal, } return false; } -static bool tryGreater(unsigned TryVal, unsigned CandVal, + +static bool tryGreater(int TryVal, int CandVal, ConvergingScheduler::SchedCandidate &TryCand, ConvergingScheduler::SchedCandidate &Cand, ConvergingScheduler::CandReason Reason) { @@ -1430,6 +1692,10 @@ static bool tryGreater(unsigned TryVal, unsigned CandVal, return false; } +static unsigned getWeakLeft(const SUnit *SU, bool isTop) { + return (isTop) ? SU->WeakPredsLeft : SU->WeakSuccsLeft; +} + /// Apply a set of heursitics to a new candidate. Heuristics are currently /// hierarchical. This may be more efficient than a graduated cost model because /// we don't need to evaluate all aspects of the model for each node in the @@ -1472,6 +1738,26 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, if (Cand.Reason == SingleCritical) Cand.Reason = MultiPressure; + // Keep clustered nodes together to encourage downstream peephole + // optimizations which may reduce resource requirements. + // + // This is a best effort to set things up for a post-RA pass. Optimizations + // like generating loads of multiple registers should ideally be done within + // the scheduler pass by combining the loads during DAG postprocessing. + const SUnit *NextClusterSU = + Zone.isTop() ? DAG->getNextClusterSucc() : DAG->getNextClusterPred(); + if (tryGreater(TryCand.SU == NextClusterSU, Cand.SU == NextClusterSU, + TryCand, Cand, Cluster)) + return; + // Currently, weak edges are for clustering, so we hard-code that reason. + // However, deferring the current TryCand will not change Cand's reason. + CandReason OrigReason = Cand.Reason; + if (tryLess(getWeakLeft(TryCand.SU, Zone.isTop()), + getWeakLeft(Cand.SU, Zone.isTop()), + TryCand, Cand, Cluster)) { + Cand.Reason = OrigReason; + return; + } // Avoid critical resource consumption and balance the schedule. TryCand.initResourceDelta(DAG, SchedModel); if (tryLess(TryCand.ResDelta.CritResources, Cand.ResDelta.CritResources, @@ -1518,15 +1804,10 @@ void ConvergingScheduler::tryCandidate(SchedCandidate &Cand, // Prefer immediate defs/users of the last scheduled instruction. This is a // nice pressure avoidance strategy that also conserves the processor's // register renaming resources and keeps the machine code readable. - if (Zone.NextSUs.count(TryCand.SU) && !Zone.NextSUs.count(Cand.SU)) { - TryCand.Reason = NextDefUse; - return; - } - if (!Zone.NextSUs.count(TryCand.SU) && Zone.NextSUs.count(Cand.SU)) { - if (Cand.Reason > NextDefUse) - Cand.Reason = NextDefUse; + if (tryGreater(Zone.NextSUs.count(TryCand.SU), Zone.NextSUs.count(Cand.SU), + TryCand, Cand, NextDefUse)) return; - } + // Fall through to original instruction order. if ((Zone.isTop() && TryCand.SU->NodeNum < Cand.SU->NodeNum) || (!Zone.isTop() && TryCand.SU->NodeNum > Cand.SU->NodeNum)) { @@ -1572,6 +1853,7 @@ const char *ConvergingScheduler::getReasonStr( case NoCand: return "NOCAND "; case SingleExcess: return "REG-EXCESS"; case SingleCritical: return "REG-CRIT "; + case Cluster: return "CLUSTER "; case SingleMax: return "REG-MAX "; case MultiPressure: return "REG-MULTI "; case ResourceReduce: return "RES-REDUCE"; @@ -1586,9 +1868,7 @@ const char *ConvergingScheduler::getReasonStr( llvm_unreachable("Unknown reason!"); } -void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, - const SchedBoundary &Zone) { - const char *Label = getReasonStr(Cand.Reason); +void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand) { PressureElement P; unsigned ResIdx = 0; unsigned Latency = 0; @@ -1623,21 +1903,21 @@ void ConvergingScheduler::traceCandidate(const SchedCandidate &Cand, Latency = Cand.SU->getDepth(); break; } - dbgs() << Label << " " << Zone.Available.getName() << " "; + dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason); if (P.isValid()) - dbgs() << TRI->getRegPressureSetName(P.PSetID) << ":" << P.UnitIncrease - << " "; + dbgs() << " " << TRI->getRegPressureSetName(P.PSetID) + << ":" << P.UnitIncrease << " "; else - dbgs() << " "; + dbgs() << " "; if (ResIdx) - dbgs() << SchedModel->getProcResource(ResIdx)->Name << " "; + dbgs() << " " << SchedModel->getProcResource(ResIdx)->Name << " "; else - dbgs() << " "; + dbgs() << " "; if (Latency) - dbgs() << Latency << " cycles "; + dbgs() << " " << Latency << " cycles "; else - dbgs() << " "; - Cand.SU->dump(DAG); + dbgs() << " "; + dbgs() << '\n'; } #endif @@ -1666,15 +1946,14 @@ void ConvergingScheduler::pickNodeFromQueue(SchedBoundary &Zone, if (TryCand.ResDelta == SchedResourceDelta()) TryCand.initResourceDelta(DAG, SchedModel); Cand.setBest(TryCand); - DEBUG(traceCandidate(Cand, Zone)); + DEBUG(traceCandidate(Cand)); } - TryCand.SU = *I; } } static void tracePick(const ConvergingScheduler::SchedCandidate &Cand, bool IsTop) { - DEBUG(dbgs() << "Pick " << (IsTop ? "top" : "bot") + DEBUG(dbgs() << "Pick " << (IsTop ? "Top" : "Bot") << " SU(" << Cand.SU->NodeNum << ") " << ConvergingScheduler::getReasonStr(Cand.Reason) << '\n'); } @@ -1786,10 +2065,7 @@ SUnit *ConvergingScheduler::pickNode(bool &IsTopNode) { if (SU->isBottomReady()) Bot.removeReady(SU); - DEBUG(dbgs() << "*** " << (IsTopNode ? "Top" : "Bottom") - << " Scheduling Instruction in cycle " - << (IsTopNode ? Top.CurrCycle : Bot.CurrCycle) << '\n'; - SU->dump(DAG)); + DEBUG(dbgs() << "Scheduling " << *SU->getInstr()); return SU; } @@ -1812,7 +2088,13 @@ void ConvergingScheduler::schedNode(SUnit *SU, bool IsTopNode) { static ScheduleDAGInstrs *createConvergingSched(MachineSchedContext *C) { assert((!ForceTopDown || !ForceBottomUp) && "-misched-topdown incompatible with -misched-bottomup"); - return new ScheduleDAGMI(C, new ConvergingScheduler()); + ScheduleDAGMI *DAG = new ScheduleDAGMI(C, new ConvergingScheduler()); + // Register DAG post-processors. + if (EnableLoadCluster) + DAG->addMutation(new LoadClusterMutation(DAG->TII, DAG->TRI)); + if (EnableMacroFusion) + DAG->addMutation(new MacroFusion(DAG->TII)); + return DAG; } static MachineSchedRegistry ConvergingSchedRegistry("converge", "Standard converging scheduler.", @@ -1825,58 +2107,97 @@ ConvergingSchedRegistry("converge", "Standard converging scheduler.", namespace { /// \brief Order nodes by the ILP metric. struct ILPOrder { - ScheduleDAGILP *ILP; + const SchedDFSResult *DFSResult; + const BitVector *ScheduledTrees; bool MaximizeILP; - ILPOrder(ScheduleDAGILP *ilp, bool MaxILP): ILP(ilp), MaximizeILP(MaxILP) {} + ILPOrder(bool MaxILP): DFSResult(0), ScheduledTrees(0), MaximizeILP(MaxILP) {} /// \brief Apply a less-than relation on node priority. + /// + /// (Return true if A comes after B in the Q.) bool operator()(const SUnit *A, const SUnit *B) const { - // Return true if A comes after B in the Q. + unsigned SchedTreeA = DFSResult->getSubtreeID(A); + unsigned SchedTreeB = DFSResult->getSubtreeID(B); + if (SchedTreeA != SchedTreeB) { + // Unscheduled trees have lower priority. + if (ScheduledTrees->test(SchedTreeA) != ScheduledTrees->test(SchedTreeB)) + return ScheduledTrees->test(SchedTreeB); + + // Trees with shallower connections have have lower priority. + if (DFSResult->getSubtreeLevel(SchedTreeA) + != DFSResult->getSubtreeLevel(SchedTreeB)) { + return DFSResult->getSubtreeLevel(SchedTreeA) + < DFSResult->getSubtreeLevel(SchedTreeB); + } + } if (MaximizeILP) - return ILP->getILP(A) < ILP->getILP(B); + return DFSResult->getILP(A) < DFSResult->getILP(B); else - return ILP->getILP(A) > ILP->getILP(B); + return DFSResult->getILP(A) > DFSResult->getILP(B); } }; /// \brief Schedule based on the ILP metric. class ILPScheduler : public MachineSchedStrategy { - ScheduleDAGILP ILP; + /// In case all subtrees are eventually connected to a common root through + /// data dependence (e.g. reduction), place an upper limit on their size. + /// + /// FIXME: A subtree limit is generally good, but in the situation commented + /// above, where multiple similar subtrees feed a common root, we should + /// only split at a point where the resulting subtrees will be balanced. + /// (a motivating test case must be found). + static const unsigned SubtreeLimit = 16; + + ScheduleDAGMI *DAG; ILPOrder Cmp; std::vector ReadyQ; public: - ILPScheduler(bool MaximizeILP) - : ILP(/*BottomUp=*/true), Cmp(&ILP, MaximizeILP) {} + ILPScheduler(bool MaximizeILP): DAG(0), Cmp(MaximizeILP) {} - virtual void initialize(ScheduleDAGMI *DAG) { + virtual void initialize(ScheduleDAGMI *dag) { + DAG = dag; + DAG->computeDFSResult(); + Cmp.DFSResult = DAG->getDFSResult(); + Cmp.ScheduledTrees = &DAG->getScheduledTrees(); ReadyQ.clear(); - ILP.resize(DAG->SUnits.size()); } virtual void registerRoots() { - for (std::vector::const_iterator - I = ReadyQ.begin(), E = ReadyQ.end(); I != E; ++I) { - ILP.computeILP(*I); - } + // Restore the heap in ReadyQ with the updated DFS results. + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); } /// Implement MachineSchedStrategy interface. /// ----------------------------------------- + /// Callback to select the highest priority node from the ready Q. virtual SUnit *pickNode(bool &IsTopNode) { if (ReadyQ.empty()) return NULL; - pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + std::pop_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); SUnit *SU = ReadyQ.back(); ReadyQ.pop_back(); IsTopNode = false; - DEBUG(dbgs() << "*** Scheduling " << *SU->getInstr() - << " ILP: " << ILP.getILP(SU) << '\n'); + DEBUG(dbgs() << "*** Scheduling " << "SU(" << SU->NodeNum << "): " + << *SU->getInstr() + << " ILP: " << DAG->getDFSResult()->getILP(SU) + << " Tree: " << DAG->getDFSResult()->getSubtreeID(SU) << " @" + << DAG->getDFSResult()->getSubtreeLevel( + DAG->getDFSResult()->getSubtreeID(SU)) << '\n'); return SU; } - virtual void schedNode(SUnit *, bool) {} + /// \brief Scheduler callback to notify that a new subtree is scheduled. + virtual void scheduleTree(unsigned SubtreeID) { + std::make_heap(ReadyQ.begin(), ReadyQ.end(), Cmp); + } + + /// Callback after a node is scheduled. Mark a newly scheduled tree, notify + /// DFSResults, and resort the priority Q. + virtual void schedNode(SUnit *SU, bool IsTopNode) { + assert(!IsTopNode && "SchedDFSResult needs bottom-up"); + } virtual void releaseTopNode(SUnit *) { /*only called for top roots*/ } @@ -1986,3 +2307,90 @@ static MachineSchedRegistry ShufflerRegistry( "shuffle", "Shuffle machine instructions alternating directions", createInstructionShuffler); #endif // !NDEBUG + +//===----------------------------------------------------------------------===// +// GraphWriter support for ScheduleDAGMI. +//===----------------------------------------------------------------------===// + +#ifndef NDEBUG +namespace llvm { + +template<> struct GraphTraits< + ScheduleDAGMI*> : public GraphTraits {}; + +template<> +struct DOTGraphTraits : public DefaultDOTGraphTraits { + + DOTGraphTraits (bool isSimple=false) : DefaultDOTGraphTraits(isSimple) {} + + static std::string getGraphName(const ScheduleDAG *G) { + return G->MF.getName(); + } + + static bool renderGraphFromBottomUp() { + return true; + } + + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + + static bool hasNodeAddressLabel(const SUnit *Node, + const ScheduleDAG *Graph) { + return false; + } + + /// If you want to override the dot attributes printed for a particular + /// edge, override this method. + static std::string getEdgeAttributes(const SUnit *Node, + SUnitIterator EI, + const ScheduleDAG *Graph) { + if (EI.isArtificialDep()) + return "color=cyan,style=dashed"; + if (EI.isCtrlDep()) + return "color=blue,style=dashed"; + return ""; + } + + static std::string getNodeLabel(const SUnit *SU, const ScheduleDAG *G) { + std::string Str; + raw_string_ostream SS(Str); + SS << "SU(" << SU->NodeNum << ')'; + return SS.str(); + } + static std::string getNodeDescription(const SUnit *SU, const ScheduleDAG *G) { + return G->getGraphNodeLabel(SU); + } + + static std::string getNodeAttributes(const SUnit *N, + const ScheduleDAG *Graph) { + std::string Str("shape=Mrecord"); + const SchedDFSResult *DFS = + static_cast(Graph)->getDFSResult(); + if (DFS) { + Str += ",style=filled,fillcolor=\"#"; + Str += DOT::getColorString(DFS->getSubtreeID(N)); + Str += '"'; + } + return Str; + } +}; +} // namespace llvm +#endif // NDEBUG + +/// viewGraph - Pop up a ghostview window with the reachable parts of the DAG +/// rendered using 'dot'. +/// +void ScheduleDAGMI::viewGraph(const Twine &Name, const Twine &Title) { +#ifndef NDEBUG + ViewGraph(this, Name, false, Title); +#else + errs() << "ScheduleDAGMI::viewGraph is only available in debug builds on " + << "systems with Graphviz or gv!\n"; +#endif // NDEBUG +} + +/// Out-of-line implementation with no arguments is handy for gdb. +void ScheduleDAGMI::viewGraph() { + viewGraph(getDAGName(), "Scheduling-Units Graph for " + getDAGName()); +} diff --git a/lib/CodeGen/MachineSink.cpp b/lib/CodeGen/MachineSink.cpp index b117f8c3a206..4dafbe5a3e3a 100644 --- a/lib/CodeGen/MachineSink.cpp +++ b/lib/CodeGen/MachineSink.cpp @@ -18,18 +18,18 @@ #define DEBUG_TYPE "machine-sink" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; static cl::opt diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index 9686b0413293..49d8c4e9470d 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -8,20 +8,21 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "machine-trace-metrics" -#include "MachineTraceMetrics.h" +#include "llvm/CodeGen/MachineTraceMetrics.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/SparseSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/MC/MCSubtargetInfo.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/SparseSet.h" using namespace llvm; @@ -57,6 +58,8 @@ bool MachineTraceMetrics::runOnMachineFunction(MachineFunction &Func) { MF->getTarget().getSubtarget(); SchedModel.init(*ST.getSchedModel(), &ST, TII); BlockInfo.resize(MF->getNumBlockIDs()); + ProcResourceCycles.resize(MF->getNumBlockIDs() * + SchedModel.getNumProcResourceKinds()); return false; } @@ -85,9 +88,13 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { return FBI; // Compute resource usage in the block. - // FIXME: Compute per-functional unit counts. FBI->HasCalls = false; unsigned InstrCount = 0; + + // Add up per-processor resource cycles as well. + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + SmallVector PRCycles(PRKinds); + for (MachineBasicBlock::const_iterator I = MBB->begin(), E = MBB->end(); I != E; ++I) { const MachineInstr *MI = I; @@ -96,11 +103,43 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { ++InstrCount; if (MI->isCall()) FBI->HasCalls = true; + + // Count processor resources used. + if (!SchedModel.hasInstrSchedModel()) + continue; + const MCSchedClassDesc *SC = SchedModel.resolveSchedClass(MI); + if (!SC->isValid()) + continue; + + for (TargetSchedModel::ProcResIter + PI = SchedModel.getWriteProcResBegin(SC), + PE = SchedModel.getWriteProcResEnd(SC); PI != PE; ++PI) { + assert(PI->ProcResourceIdx < PRKinds && "Bad processor resource kind"); + PRCycles[PI->ProcResourceIdx] += PI->Cycles; + } } FBI->InstrCount = InstrCount; + + // Scale the resource cycles so they are comparable. + unsigned PROffset = MBB->getNumber() * PRKinds; + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceCycles[PROffset + K] = + PRCycles[K] * SchedModel.getResourceFactor(K); + return FBI; } +ArrayRef +MachineTraceMetrics::getProcResourceCycles(unsigned MBBNum) const { + assert(BlockInfo[MBBNum].hasResources() && + "getResources() must be called before getProcResourceCycles()"); + unsigned PRKinds = SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceCycles.size()); + return ArrayRef(ProcResourceCycles.data() + MBBNum * PRKinds, + PRKinds); +} + + //===----------------------------------------------------------------------===// // Ensemble utility functions //===----------------------------------------------------------------------===// @@ -108,6 +147,9 @@ MachineTraceMetrics::getResources(const MachineBasicBlock *MBB) { MachineTraceMetrics::Ensemble::Ensemble(MachineTraceMetrics *ct) : MTM(*ct) { BlockInfo.resize(MTM.BlockInfo.size()); + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + ProcResourceDepths.resize(MTM.BlockInfo.size() * PRKinds); + ProcResourceHeights.resize(MTM.BlockInfo.size() * PRKinds); } // Virtual destructor serves as an anchor. @@ -123,21 +165,32 @@ MachineTraceMetrics::Ensemble::getLoopFor(const MachineBasicBlock *MBB) const { void MachineTraceMetrics::Ensemble:: computeDepthResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources from trace above. The top block is simple. if (!TBI->Pred) { TBI->InstrDepth = 0; TBI->Head = MBB->getNumber(); + std::fill(ProcResourceDepths.begin() + PROffset, + ProcResourceDepths.begin() + PROffset + PRKinds, 0); return; } // Compute from the block above. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *PredTBI = &BlockInfo[TBI->Pred->getNumber()]; + unsigned PredNum = TBI->Pred->getNumber(); + TraceBlockInfo *PredTBI = &BlockInfo[PredNum]; assert(PredTBI->hasValidDepth() && "Trace above has not been computed yet"); const FixedBlockInfo *PredFBI = MTM.getResources(TBI->Pred); TBI->InstrDepth = PredTBI->InstrDepth + PredFBI->InstrCount; TBI->Head = PredTBI->Head; + + // Compute per-resource depths. + ArrayRef PredPRDepths = getProcResourceDepths(PredNum); + ArrayRef PredPRCycles = MTM.getProcResourceCycles(PredNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceDepths[PROffset + K] = PredPRDepths[K] + PredPRCycles[K]; } // Update resource-related information in the TraceBlockInfo for MBB. @@ -145,22 +198,33 @@ computeDepthResources(const MachineBasicBlock *MBB) { void MachineTraceMetrics::Ensemble:: computeHeightResources(const MachineBasicBlock *MBB) { TraceBlockInfo *TBI = &BlockInfo[MBB->getNumber()]; + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + unsigned PROffset = MBB->getNumber() * PRKinds; // Compute resources for the current block. TBI->InstrHeight = MTM.getResources(MBB)->InstrCount; + ArrayRef PRCycles = MTM.getProcResourceCycles(MBB->getNumber()); // The trace tail is done. if (!TBI->Succ) { TBI->Tail = MBB->getNumber(); + std::copy(PRCycles.begin(), PRCycles.end(), + ProcResourceHeights.begin() + PROffset); return; } // Compute from the block below. A post-order traversal ensures the // predecessor is always computed first. - TraceBlockInfo *SuccTBI = &BlockInfo[TBI->Succ->getNumber()]; + unsigned SuccNum = TBI->Succ->getNumber(); + TraceBlockInfo *SuccTBI = &BlockInfo[SuccNum]; assert(SuccTBI->hasValidHeight() && "Trace below has not been computed yet"); TBI->InstrHeight += SuccTBI->InstrHeight; TBI->Tail = SuccTBI->Tail; + + // Compute per-resource heights. + ArrayRef SuccPRHeights = getProcResourceHeights(SuccNum); + for (unsigned K = 0; K != PRKinds; ++K) + ProcResourceHeights[PROffset + K] = SuccPRHeights[K] + PRCycles[K]; } // Check if depth resources for MBB are valid and return the TBI. @@ -181,6 +245,35 @@ getHeightResources(const MachineBasicBlock *MBB) const { return TBI->hasValidHeight() ? TBI : 0; } +/// Get an array of processor resource depths for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by all blocks preceding MBB in its trace. It does not include instructions +/// in MBB. +/// +/// Compare TraceBlockInfo::InstrDepth. +ArrayRef +MachineTraceMetrics::Ensemble:: +getProcResourceDepths(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceDepths.size()); + return ArrayRef(ProcResourceDepths.data() + MBBNum * PRKinds, + PRKinds); +} + +/// Get an array of processor resource heights for MBB. Indexed by processor +/// resource kind, this array contains the scaled processor resources consumed +/// by this block and all blocks following it in its trace. +/// +/// Compare TraceBlockInfo::InstrHeight. +ArrayRef +MachineTraceMetrics::Ensemble:: +getProcResourceHeights(unsigned MBBNum) const { + unsigned PRKinds = MTM.SchedModel.getNumProcResourceKinds(); + assert((MBBNum+1) * PRKinds <= ProcResourceHeights.size()); + return ArrayRef(ProcResourceHeights.data() + MBBNum * PRKinds, + PRKinds); +} + //===----------------------------------------------------------------------===// // Trace Selection Strategies //===----------------------------------------------------------------------===// @@ -677,7 +770,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); // Ignore dependencies outside the current trace. const TraceBlockInfo &DefTBI = BlockInfo[DefMI->getParent()->getNumber()]; - if (!DefTBI.isEarlierInSameTrace(TBI)) + if (!DefTBI.isUsefulDominator(TBI)) continue; unsigned Len = LIR.Height + Cycles[DefMI].Depth; MaxLen = std::max(MaxLen, Len); @@ -713,11 +806,24 @@ computeInstrDepths(const MachineBasicBlock *MBB) { SmallVector Deps; while (!Stack.empty()) { MBB = Stack.pop_back_val(); - DEBUG(dbgs() << "Depths for BB#" << MBB->getNumber() << ":\n"); + DEBUG(dbgs() << "\nDepths for BB#" << MBB->getNumber() << ":\n"); TraceBlockInfo &TBI = BlockInfo[MBB->getNumber()]; TBI.HasValidInstrDepths = true; TBI.CriticalPath = 0; + // Print out resource depths here as well. + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrDepth); + ArrayRef PRDepths = getProcResourceDepths(MBB->getNumber()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + if (PRDepths[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRDepths[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRDepths[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Also compute the critical path length through MBB when possible. if (TBI.HasValidInstrHeights) TBI.CriticalPath = computeCrossBlockCriticalPath(TBI); @@ -740,7 +846,7 @@ computeInstrDepths(const MachineBasicBlock *MBB) { const TraceBlockInfo&DepTBI = BlockInfo[Dep.DefMI->getParent()->getNumber()]; // Ignore dependencies from outside the current trace. - if (!DepTBI.isEarlierInSameTrace(TBI)) + if (!DepTBI.isUsefulDominator(TBI)) continue; assert(DepTBI.HasValidInstrDepths && "Inconsistent dependency"); unsigned DepCycle = Cycles.lookup(Dep.DefMI).Depth; @@ -928,6 +1034,18 @@ computeInstrHeights(const MachineBasicBlock *MBB) { TBI.HasValidInstrHeights = true; TBI.CriticalPath = 0; + DEBUG({ + dbgs() << format("%7u Instructions\n", TBI.InstrHeight); + ArrayRef PRHeights = getProcResourceHeights(MBB->getNumber()); + for (unsigned K = 0; K != PRHeights.size(); ++K) + if (PRHeights[K]) { + unsigned Factor = MTM.SchedModel.getResourceFactor(K); + dbgs() << format("%6uc @ ", MTM.getCycles(PRHeights[K])) + << MTM.SchedModel.getProcResource(K)->Name << " (" + << PRHeights[K]/Factor << " ops x" << Factor << ")\n"; + } + }); + // Get dependencies from PHIs in the trace successor. const MachineBasicBlock *Succ = TBI.Succ; // If MBB is the last block in the trace, and it has a back-edge to the @@ -1058,27 +1176,52 @@ MachineTraceMetrics::Trace::getPHIDepth(const MachineInstr *PHI) const { } unsigned MachineTraceMetrics::Trace::getResourceDepth(bool Bottom) const { - // For now, we compute the resource depth from instruction count / issue - // width. Eventually, we should compute resource depth per functional unit - // and return the max. + // Find the limiting processor resource. + // Numbers have been pre-scaled to be comparable. + unsigned PRMax = 0; + ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum()); + if (Bottom) { + ArrayRef PRCycles = TE.MTM.getProcResourceCycles(getBlockNum()); + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K] + PRCycles[K]); + } else { + for (unsigned K = 0; K != PRDepths.size(); ++K) + PRMax = std::max(PRMax, PRDepths[K]); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth; if (Bottom) Instrs += TE.MTM.BlockInfo[getBlockNum()].InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } unsigned MachineTraceMetrics::Trace:: getResourceLength(ArrayRef Extrablocks) const { + // Add up resources above and below the center block. + ArrayRef PRDepths = TE.getProcResourceDepths(getBlockNum()); + ArrayRef PRHeights = TE.getProcResourceHeights(getBlockNum()); + unsigned PRMax = 0; + for (unsigned K = 0; K != PRDepths.size(); ++K) { + unsigned PRCycles = PRDepths[K] + PRHeights[K]; + for (unsigned I = 0; I != Extrablocks.size(); ++I) + PRCycles += TE.MTM.getProcResourceCycles(Extrablocks[I]->getNumber())[K]; + PRMax = std::max(PRMax, PRCycles); + } + // Convert to cycle count. + PRMax = TE.MTM.getCycles(PRMax); + unsigned Instrs = TBI.InstrDepth + TBI.InstrHeight; for (unsigned i = 0, e = Extrablocks.size(); i != e; ++i) Instrs += TE.MTM.getResources(Extrablocks[i])->InstrCount; if (unsigned IW = TE.MTM.SchedModel.getIssueWidth()) Instrs /= IW; // Assume issue width 1 without a schedule model. - return Instrs; + return std::max(Instrs, PRMax); } void MachineTraceMetrics::Ensemble::print(raw_ostream &OS) const { diff --git a/lib/CodeGen/MachineTraceMetrics.h b/lib/CodeGen/MachineTraceMetrics.h deleted file mode 100644 index 460730b04059..000000000000 --- a/lib/CodeGen/MachineTraceMetrics.h +++ /dev/null @@ -1,350 +0,0 @@ -//===- lib/CodeGen/MachineTraceMetrics.h - Super-scalar metrics -*- C++ -*-===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file defines the interface for the MachineTraceMetrics analysis pass -// that estimates CPU resource usage and critical data dependency paths through -// preferred traces. This is useful for super-scalar CPUs where execution speed -// can be limited both by data dependencies and by limited execution resources. -// -// Out-of-order CPUs will often be executing instructions from multiple basic -// blocks at the same time. This makes it difficult to estimate the resource -// usage accurately in a single basic block. Resources can be estimated better -// by looking at a trace through the current basic block. -// -// For every block, the MachineTraceMetrics pass will pick a preferred trace -// that passes through the block. The trace is chosen based on loop structure, -// branch probabilities, and resource usage. The intention is to pick likely -// traces that would be the most affected by code transformations. -// -// It is expensive to compute a full arbitrary trace for every block, so to -// save some computations, traces are chosen to be convergent. This means that -// if the traces through basic blocks A and B ever cross when moving away from -// A and B, they never diverge again. This applies in both directions - If the -// traces meet above A and B, they won't diverge when going further back. -// -// Traces tend to align with loops. The trace through a block in an inner loop -// will begin at the loop entry block and end at a back edge. If there are -// nested loops, the trace may begin and end at those instead. -// -// For each trace, we compute the critical path length, which is the number of -// cycles required to execute the trace when execution is limited by data -// dependencies only. We also compute the resource height, which is the number -// of cycles required to execute all instructions in the trace when ignoring -// data dependencies. -// -// Every instruction in the current block has a slack - the number of cycles -// execution of the instruction can be delayed without extending the critical -// path. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_MACHINE_TRACE_METRICS_H -#define LLVM_CODEGEN_MACHINE_TRACE_METRICS_H - -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/TargetSchedule.h" - -namespace llvm { - -class InstrItineraryData; -class MachineBasicBlock; -class MachineInstr; -class MachineLoop; -class MachineLoopInfo; -class MachineRegisterInfo; -class TargetInstrInfo; -class TargetRegisterInfo; -class raw_ostream; - -class MachineTraceMetrics : public MachineFunctionPass { - const MachineFunction *MF; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - const MachineRegisterInfo *MRI; - const MachineLoopInfo *Loops; - TargetSchedModel SchedModel; - -public: - class Ensemble; - class Trace; - static char ID; - MachineTraceMetrics(); - void getAnalysisUsage(AnalysisUsage&) const; - bool runOnMachineFunction(MachineFunction&); - void releaseMemory(); - void verifyAnalysis() const; - - friend class Ensemble; - friend class Trace; - - /// Per-basic block information that doesn't depend on the trace through the - /// block. - struct FixedBlockInfo { - /// The number of non-trivial instructions in the block. - /// Doesn't count PHI and COPY instructions that are likely to be removed. - unsigned InstrCount; - - /// True when the block contains calls. - bool HasCalls; - - FixedBlockInfo() : InstrCount(~0u), HasCalls(false) {} - - /// Returns true when resource information for this block has been computed. - bool hasResources() const { return InstrCount != ~0u; } - - /// Invalidate resource information. - void invalidate() { InstrCount = ~0u; } - }; - - /// Get the fixed resource information about MBB. Compute it on demand. - const FixedBlockInfo *getResources(const MachineBasicBlock*); - - /// A virtual register or regunit required by a basic block or its trace - /// successors. - struct LiveInReg { - /// The virtual register required, or a register unit. - unsigned Reg; - - /// For virtual registers: Minimum height of the defining instruction. - /// For regunits: Height of the highest user in the trace. - unsigned Height; - - LiveInReg(unsigned Reg, unsigned Height = 0) : Reg(Reg), Height(Height) {} - }; - - /// Per-basic block information that relates to a specific trace through the - /// block. Convergent traces means that only one of these is required per - /// block in a trace ensemble. - struct TraceBlockInfo { - /// Trace predecessor, or NULL for the first block in the trace. - /// Valid when hasValidDepth(). - const MachineBasicBlock *Pred; - - /// Trace successor, or NULL for the last block in the trace. - /// Valid when hasValidHeight(). - const MachineBasicBlock *Succ; - - /// The block number of the head of the trace. (When hasValidDepth()). - unsigned Head; - - /// The block number of the tail of the trace. (When hasValidHeight()). - unsigned Tail; - - /// Accumulated number of instructions in the trace above this block. - /// Does not include instructions in this block. - unsigned InstrDepth; - - /// Accumulated number of instructions in the trace below this block. - /// Includes instructions in this block. - unsigned InstrHeight; - - TraceBlockInfo() : - Pred(0), Succ(0), - InstrDepth(~0u), InstrHeight(~0u), - HasValidInstrDepths(false), HasValidInstrHeights(false) {} - - /// Returns true if the depth resources have been computed from the trace - /// above this block. - bool hasValidDepth() const { return InstrDepth != ~0u; } - - /// Returns true if the height resources have been computed from the trace - /// below this block. - bool hasValidHeight() const { return InstrHeight != ~0u; } - - /// Invalidate depth resources when some block above this one has changed. - void invalidateDepth() { InstrDepth = ~0u; HasValidInstrDepths = false; } - - /// Invalidate height resources when a block below this one has changed. - void invalidateHeight() { InstrHeight = ~0u; HasValidInstrHeights = false; } - - /// Determine if this block belongs to the same trace as TBI and comes - /// before it in the trace. - /// Also returns true when TBI == this. - bool isEarlierInSameTrace(const TraceBlockInfo &TBI) const { - return hasValidDepth() && TBI.hasValidDepth() && - Head == TBI.Head && InstrDepth <= TBI.InstrDepth; - } - - // Data-dependency-related information. Per-instruction depth and height - // are computed from data dependencies in the current trace, using - // itinerary data. - - /// Instruction depths have been computed. This implies hasValidDepth(). - bool HasValidInstrDepths; - - /// Instruction heights have been computed. This implies hasValidHeight(). - bool HasValidInstrHeights; - - /// Critical path length. This is the number of cycles in the longest data - /// dependency chain through the trace. This is only valid when both - /// HasValidInstrDepths and HasValidInstrHeights are set. - unsigned CriticalPath; - - /// Live-in registers. These registers are defined above the current block - /// and used by this block or a block below it. - /// This does not include PHI uses in the current block, but it does - /// include PHI uses in deeper blocks. - SmallVector LiveIns; - - void print(raw_ostream&) const; - }; - - /// InstrCycles represents the cycle height and depth of an instruction in a - /// trace. - struct InstrCycles { - /// Earliest issue cycle as determined by data dependencies and instruction - /// latencies from the beginning of the trace. Data dependencies from - /// before the trace are not included. - unsigned Depth; - - /// Minimum number of cycles from this instruction is issued to the of the - /// trace, as determined by data dependencies and instruction latencies. - unsigned Height; - }; - - /// A trace represents a plausible sequence of executed basic blocks that - /// passes through the current basic block one. The Trace class serves as a - /// handle to internal cached data structures. - class Trace { - Ensemble &TE; - TraceBlockInfo &TBI; - - unsigned getBlockNum() const { return &TBI - &TE.BlockInfo[0]; } - - public: - explicit Trace(Ensemble &te, TraceBlockInfo &tbi) : TE(te), TBI(tbi) {} - void print(raw_ostream&) const; - - /// Compute the total number of instructions in the trace. - unsigned getInstrCount() const { - return TBI.InstrDepth + TBI.InstrHeight; - } - - /// Return the resource depth of the top/bottom of the trace center block. - /// This is the number of cycles required to execute all instructions from - /// the trace head to the trace center block. The resource depth only - /// considers execution resources, it ignores data dependencies. - /// When Bottom is set, instructions in the trace center block are included. - unsigned getResourceDepth(bool Bottom) const; - - /// Return the resource length of the trace. This is the number of cycles - /// required to execute the instructions in the trace if they were all - /// independent, exposing the maximum instruction-level parallelism. - /// - /// Any blocks in Extrablocks are included as if they were part of the - /// trace. - unsigned getResourceLength(ArrayRef Extrablocks = - ArrayRef()) const; - - /// Return the length of the (data dependency) critical path through the - /// trace. - unsigned getCriticalPath() const { return TBI.CriticalPath; } - - /// Return the depth and height of MI. The depth is only valid for - /// instructions in or above the trace center block. The height is only - /// valid for instructions in or below the trace center block. - InstrCycles getInstrCycles(const MachineInstr *MI) const { - return TE.Cycles.lookup(MI); - } - - /// Return the slack of MI. This is the number of cycles MI can be delayed - /// before the critical path becomes longer. - /// MI must be an instruction in the trace center block. - unsigned getInstrSlack(const MachineInstr *MI) const; - - /// Return the Depth of a PHI instruction in a trace center block successor. - /// The PHI does not have to be part of the trace. - unsigned getPHIDepth(const MachineInstr *PHI) const; - }; - - /// A trace ensemble is a collection of traces selected using the same - /// strategy, for example 'minimum resource height'. There is one trace for - /// every block in the function. - class Ensemble { - SmallVector BlockInfo; - DenseMap Cycles; - friend class Trace; - - void computeTrace(const MachineBasicBlock*); - void computeDepthResources(const MachineBasicBlock*); - void computeHeightResources(const MachineBasicBlock*); - unsigned computeCrossBlockCriticalPath(const TraceBlockInfo&); - void computeInstrDepths(const MachineBasicBlock*); - void computeInstrHeights(const MachineBasicBlock*); - void addLiveIns(const MachineInstr *DefMI, unsigned DefOp, - ArrayRef Trace); - - protected: - MachineTraceMetrics &MTM; - virtual const MachineBasicBlock *pickTracePred(const MachineBasicBlock*) =0; - virtual const MachineBasicBlock *pickTraceSucc(const MachineBasicBlock*) =0; - explicit Ensemble(MachineTraceMetrics*); - const MachineLoop *getLoopFor(const MachineBasicBlock*) const; - const TraceBlockInfo *getDepthResources(const MachineBasicBlock*) const; - const TraceBlockInfo *getHeightResources(const MachineBasicBlock*) const; - - public: - virtual ~Ensemble(); - virtual const char *getName() const =0; - void print(raw_ostream&) const; - void invalidate(const MachineBasicBlock *MBB); - void verify() const; - - /// Get the trace that passes through MBB. - /// The trace is computed on demand. - Trace getTrace(const MachineBasicBlock *MBB); - }; - - /// Strategies for selecting traces. - enum Strategy { - /// Select the trace through a block that has the fewest instructions. - TS_MinInstrCount, - - TS_NumStrategies - }; - - /// Get the trace ensemble representing the given trace selection strategy. - /// The returned Ensemble object is owned by the MachineTraceMetrics analysis, - /// and valid for the lifetime of the analysis pass. - Ensemble *getEnsemble(Strategy); - - /// Invalidate cached information about MBB. This must be called *before* MBB - /// is erased, or the CFG is otherwise changed. - /// - /// This invalidates per-block information about resource usage for MBB only, - /// and it invalidates per-trace information for any trace that passes - /// through MBB. - /// - /// Call Ensemble::getTrace() again to update any trace handles. - void invalidate(const MachineBasicBlock *MBB); - -private: - // One entry per basic block, indexed by block number. - SmallVector BlockInfo; - - // One ensemble per strategy. - Ensemble* Ensembles[TS_NumStrategies]; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Trace &Tr) { - Tr.print(OS); - return OS; -} - -inline raw_ostream &operator<<(raw_ostream &OS, - const MachineTraceMetrics::Ensemble &En) { - En.print(OS); - return OS; -} -} // end namespace llvm - -#endif diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 69a3ae84ec99..4b1230029a74 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -23,28 +23,28 @@ // the verifier errors. //===----------------------------------------------------------------------===// -#include "llvm/BasicBlock.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/SetOperations.h" +#include "llvm/ADT/SmallVector.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/MachineInstrBundle.h" -#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBundle.h" #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/BasicBlock.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/SetOperations.h" -#include "llvm/ADT/SmallVector.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace { @@ -307,6 +307,9 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { visitMachineBasicBlockBefore(MFI); // Keep track of the current bundle header. const MachineInstr *CurBundle = 0; + // Do we expect the next instruction to be part of the same bundle? + bool InBundle = false; + for (MachineBasicBlock::const_instr_iterator MBBI = MFI->instr_begin(), MBBE = MFI->instr_end(); MBBI != MBBE; ++MBBI) { if (MBBI->getParent() != MFI) { @@ -314,6 +317,15 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { *OS << "Instruction: " << *MBBI; continue; } + + // Check for consistent bundle flags. + if (InBundle && !MBBI->isBundledWithPred()) + report("Missing BundledPred flag, " + "BundledSucc was set on predecessor", MBBI); + if (!InBundle && MBBI->isBundledWithPred()) + report("BundledPred flag is set, " + "but BundledSucc not set on predecessor", MBBI); + // Is this a bundle header? if (!MBBI->isInsideBundle()) { if (CurBundle) @@ -326,9 +338,14 @@ bool MachineVerifier::runOnMachineFunction(MachineFunction &MF) { for (unsigned I = 0, E = MBBI->getNumOperands(); I != E; ++I) visitMachineOperand(&MBBI->getOperand(I), I); visitMachineInstrAfter(MBBI); + + // Was this the last bundled instruction? + InBundle = MBBI->isBundledWithSucc(); } if (CurBundle) visitMachineBundleAfter(CurBundle); + if (InBundle) + report("BundledSucc flag set on last instruction in block", &MFI->back()); visitMachineBasicBlockAfter(MFI); } visitMachineFunctionAfter(); @@ -580,7 +597,7 @@ MachineVerifier::visitMachineBasicBlockBefore(const MachineBasicBlock *MBB) { ++MBBI; if (MBBI == MF->end()) { report("MBB conditionally falls through out of function!", MBB); - } if (MBB->succ_size() == 1) { + } else if (MBB->succ_size() == 1) { // A conditional branch with only one successor is weird, but allowed. if (&*MBBI != TBB) report("MBB exits via conditional branch/fall-through but only has " diff --git a/lib/CodeGen/OptimizePHIs.cpp b/lib/CodeGen/OptimizePHIs.cpp index 6da313e632af..3982612e8c11 100644 --- a/lib/CodeGen/OptimizePHIs.cpp +++ b/lib/CodeGen/OptimizePHIs.cpp @@ -14,13 +14,13 @@ #define DEBUG_TYPE "phi-opt" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Function.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumPHICycles, "Number of PHI cycles replaced"); diff --git a/lib/CodeGen/PHIElimination.cpp b/lib/CodeGen/PHIElimination.cpp index e6e23da27c1d..5584708eae36 100644 --- a/lib/CodeGen/PHIElimination.cpp +++ b/lib/CodeGen/PHIElimination.cpp @@ -14,23 +14,24 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "phielim" +#include "llvm/CodeGen/Passes.h" #include "PHIEliminationUtils.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Function.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include using namespace llvm; @@ -39,9 +40,16 @@ DisableEdgeSplitting("disable-phi-elim-edge-splitting", cl::init(false), cl::Hidden, cl::desc("Disable critical edge splitting " "during PHI elimination")); +static cl::opt +SplitAllCriticalEdges("phi-elim-split-all-critical-edges", cl::init(false), + cl::Hidden, cl::desc("Split all critical edges during " + "PHI elimination")); + namespace { class PHIElimination : public MachineFunctionPass { MachineRegisterInfo *MRI; // Machine register information + LiveVariables *LV; + LiveIntervals *LIS; public: static char ID; // Pass identification, replacement for typeid @@ -57,8 +65,8 @@ namespace { /// in predecessor basic blocks. /// bool EliminatePHINodes(MachineFunction &MF, MachineBasicBlock &MBB); - void LowerAtomicPHINode(MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt); + void LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt); /// analyzePHINodes - Gather information about the PHI nodes in /// here. In particular, we want to map the number of uses of a virtual @@ -70,7 +78,12 @@ namespace { /// Split critical edges where necessary for good coalescer performance. bool SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI); + MachineLoopInfo *MLI); + + // These functions are temporary abstractions around LiveVariables and + // LiveIntervals, so they can go away when LiveVariables does. + bool isLiveIn(unsigned Reg, MachineBasicBlock *MBB); + bool isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB); typedef std::pair BBVRegPair; typedef DenseMap VRegPHIUse; @@ -87,7 +100,7 @@ namespace { }; } -STATISTIC(NumAtomic, "Number of atomic phis lowered"); +STATISTIC(NumLowered, "Number of phis lowered"); STATISTIC(NumCriticalEdgesSplit, "Number of critical edges split"); STATISTIC(NumReused, "Number of reused lowered phis"); @@ -103,6 +116,8 @@ INITIALIZE_PASS_END(PHIElimination, "phi-node-elimination", void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { AU.addPreserved(); + AU.addPreserved(); + AU.addPreserved(); AU.addPreserved(); AU.addPreserved(); MachineFunctionPass::getAnalysisUsage(AU); @@ -110,19 +125,20 @@ void PHIElimination::getAnalysisUsage(AnalysisUsage &AU) const { bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { MRI = &MF.getRegInfo(); + LV = getAnalysisIfAvailable(); + LIS = getAnalysisIfAvailable(); bool Changed = false; // This pass takes the function out of SSA form. MRI->leaveSSA(); - // Split critical edges to help the coalescer - if (!DisableEdgeSplitting) { - if (LiveVariables *LV = getAnalysisIfAvailable()) { - MachineLoopInfo *MLI = getAnalysisIfAvailable(); - for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) - Changed |= SplitPHIEdges(MF, *I, *LV, MLI); - } + // Split critical edges to help the coalescer. This does not yet support + // updating LiveIntervals, so we disable it. + if (!DisableEdgeSplitting && (LV || LIS)) { + MachineLoopInfo *MLI = getAnalysisIfAvailable(); + for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) + Changed |= SplitPHIEdges(MF, *I, MLI); } // Populate VRegPHIUseCount @@ -137,14 +153,20 @@ bool PHIElimination::runOnMachineFunction(MachineFunction &MF) { E = ImpDefs.end(); I != E; ++I) { MachineInstr *DefMI = *I; unsigned DefReg = DefMI->getOperand(0).getReg(); - if (MRI->use_nodbg_empty(DefReg)) + if (MRI->use_nodbg_empty(DefReg)) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(DefMI); DefMI->eraseFromParent(); + } } // Clean up the lowered PHI instructions. for (LoweredPHIMap::iterator I = LoweredPHIs.begin(), E = LoweredPHIs.end(); - I != E; ++I) + I != E; ++I) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(I->first); MF.DeleteMachineInstr(I->first); + } LoweredPHIs.clear(); ImpDefs.clear(); @@ -166,7 +188,7 @@ bool PHIElimination::EliminatePHINodes(MachineFunction &MF, MachineBasicBlock::iterator AfterPHIsIt = MBB.SkipPHIsAndLabels(MBB.begin()); while (MBB.front().isPHI()) - LowerAtomicPHINode(MBB, AfterPHIsIt); + LowerPHINode(MBB, AfterPHIsIt); return true; } @@ -193,15 +215,11 @@ static bool isSourceDefinedByImplicitDef(const MachineInstr *MPhi, } -/// LowerAtomicPHINode - Lower the PHI node at the top of the specified block, -/// under the assumption that it needs to be lowered in a way that supports -/// atomic execution of PHIs. This lowering method is always correct all of the -/// time. +/// LowerPHINode - Lower the PHI node at the top of the specified block, /// -void PHIElimination::LowerAtomicPHINode( - MachineBasicBlock &MBB, - MachineBasicBlock::iterator AfterPHIsIt) { - ++NumAtomic; +void PHIElimination::LowerPHINode(MachineBasicBlock &MBB, + MachineBasicBlock::iterator AfterPHIsIt) { + ++NumLowered; // Unlink the PHI node from the basic block, but don't delete the PHI yet. MachineInstr *MPhi = MBB.remove(MBB.begin()); @@ -244,7 +262,6 @@ void PHIElimination::LowerAtomicPHINode( } // Update live variable information if there is any. - LiveVariables *LV = getAnalysisIfAvailable(); if (LV) { MachineInstr *PHICopy = prior(AfterPHIsIt); @@ -283,6 +300,48 @@ void PHIElimination::LowerAtomicPHINode( } } + // Update LiveIntervals for the new copy or implicit def. + if (LIS) { + MachineInstr *NewInstr = prior(AfterPHIsIt); + SlotIndex DestCopyIndex = LIS->InsertMachineInstrInMaps(NewInstr); + + SlotIndex MBBStartIndex = LIS->getMBBStartIdx(&MBB); + if (IncomingReg) { + // Add the region from the beginning of MBB to the copy instruction to + // IncomingReg's live interval. + LiveInterval &IncomingLI = LIS->getOrCreateInterval(IncomingReg); + VNInfo *IncomingVNI = IncomingLI.getVNInfoAt(MBBStartIndex); + if (!IncomingVNI) + IncomingVNI = IncomingLI.getNextValue(MBBStartIndex, + LIS->getVNInfoAllocator()); + IncomingLI.addRange(LiveRange(MBBStartIndex, + DestCopyIndex.getRegSlot(), + IncomingVNI)); + } + + LiveInterval &DestLI = LIS->getInterval(DestReg); + assert(DestLI.begin() != DestLI.end() && + "PHIs should have nonempty LiveIntervals."); + if (DestLI.endIndex().isDead()) { + // A dead PHI's live range begins and ends at the start of the MBB, but + // the lowered copy, which will still be dead, needs to begin and end at + // the copy instruction. + VNInfo *OrigDestVNI = DestLI.getVNInfoAt(MBBStartIndex); + assert(OrigDestVNI && "PHI destination should be live at block entry."); + DestLI.removeRange(MBBStartIndex, MBBStartIndex.getDeadSlot()); + DestLI.createDeadDef(DestCopyIndex.getRegSlot(), + LIS->getVNInfoAllocator()); + DestLI.removeValNo(OrigDestVNI); + } else { + // Otherwise, remove the region from the beginning of MBB to the copy + // instruction from DestReg's live interval. + DestLI.removeRange(MBBStartIndex, DestCopyIndex.getRegSlot()); + VNInfo *DestVNI = DestLI.getVNInfoAt(DestCopyIndex.getRegSlot()); + assert(DestVNI && "PHI destination should be live at its definition."); + DestVNI->def = DestCopyIndex.getRegSlot(); + } + } + // Adjust the VRegPHIUseCount map to account for the removal of this PHI node. for (unsigned i = 1; i != MPhi->getNumOperands(); i += 2) --VRegPHIUseCount[BBVRegPair(MPhi->getOperand(i+1).getMBB()->getNumber(), @@ -315,45 +374,44 @@ void PHIElimination::LowerAtomicPHINode( findPHICopyInsertPoint(&opBlock, &MBB, SrcReg); // Insert the copy. + MachineInstr *NewSrcInstr = 0; if (!reusedIncoming && IncomingReg) { if (SrcUndef) { // The source register is undefined, so there is no need for a real // COPY, but we still need to ensure joint dominance by defs. // Insert an IMPLICIT_DEF instruction. - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::IMPLICIT_DEF), IncomingReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::IMPLICIT_DEF), + IncomingReg); // Clean up the old implicit-def, if there even was one. if (MachineInstr *DefMI = MRI->getVRegDef(SrcReg)) if (DefMI->isImplicitDef()) ImpDefs.insert(DefMI); } else { - BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), - TII->get(TargetOpcode::COPY), IncomingReg) - .addReg(SrcReg, 0, SrcSubReg); + NewSrcInstr = BuildMI(opBlock, InsertPos, MPhi->getDebugLoc(), + TII->get(TargetOpcode::COPY), IncomingReg) + .addReg(SrcReg, 0, SrcSubReg); } } - // Now update live variable information if we have it. Otherwise we're done - if (SrcUndef || !LV) continue; - - // We want to be able to insert a kill of the register if this PHI (aka, the - // copy we just inserted) is the last use of the source value. Live - // variable analysis conservatively handles this by saying that the value is - // live until the end of the block the PHI entry lives in. If the value - // really is dead at the PHI copy, there will be no successor blocks which - // have the value live-in. - - // Also check to see if this register is in use by another PHI node which - // has not yet been eliminated. If so, it will be killed at an appropriate - // point later. - - // Is it used by any PHI instructions in this block? - bool ValueIsUsed = VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]; + // We only need to update the LiveVariables kill of SrcReg if this was the + // last PHI use of SrcReg to be lowered on this CFG edge and it is not live + // out of the predecessor. We can also ignore undef sources. + if (LV && !SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)] && + !LV->isLiveOut(SrcReg, opBlock)) { + // We want to be able to insert a kill of the register if this PHI (aka, + // the copy we just inserted) is the last use of the source value. Live + // variable analysis conservatively handles this by saying that the value + // is live until the end of the block the PHI entry lives in. If the value + // really is dead at the PHI copy, there will be no successor blocks which + // have the value live-in. + + // Okay, if we now know that the value is not live out of the block, we + // can add a kill marker in this block saying that it kills the incoming + // value! - // Okay, if we now know that the value is not live out of the block, we can - // add a kill marker in this block saying that it kills the incoming value! - if (!ValueIsUsed && !LV->isLiveOut(SrcReg, opBlock)) { // In our final twist, we have to decide which instruction kills the // register. In most cases this is the copy, however, terminator // instructions at the end of the block may also use the value. In this @@ -394,11 +452,74 @@ void PHIElimination::LowerAtomicPHINode( unsigned opBlockNum = opBlock.getNumber(); LV->getVarInfo(SrcReg).AliveBlocks.reset(opBlockNum); } + + if (LIS) { + if (NewSrcInstr) { + LIS->InsertMachineInstrInMaps(NewSrcInstr); + LIS->addLiveRangeToEndOfBlock(IncomingReg, NewSrcInstr); + } + + if (!SrcUndef && + !VRegPHIUseCount[BBVRegPair(opBlock.getNumber(), SrcReg)]) { + LiveInterval &SrcLI = LIS->getInterval(SrcReg); + + bool isLiveOut = false; + for (MachineBasicBlock::succ_iterator SI = opBlock.succ_begin(), + SE = opBlock.succ_end(); SI != SE; ++SI) { + SlotIndex startIdx = LIS->getMBBStartIdx(*SI); + VNInfo *VNI = SrcLI.getVNInfoAt(startIdx); + + // Definitions by other PHIs are not truly live-in for our purposes. + if (VNI && VNI->def != startIdx) { + isLiveOut = true; + break; + } + } + + if (!isLiveOut) { + MachineBasicBlock::iterator KillInst = opBlock.end(); + MachineBasicBlock::iterator FirstTerm = opBlock.getFirstTerminator(); + for (MachineBasicBlock::iterator Term = FirstTerm; + Term != opBlock.end(); ++Term) { + if (Term->readsRegister(SrcReg)) + KillInst = Term; + } + + if (KillInst == opBlock.end()) { + // No terminator uses the register. + + if (reusedIncoming || !IncomingReg) { + // We may have to rewind a bit if we didn't just insert a copy. + KillInst = FirstTerm; + while (KillInst != opBlock.begin()) { + --KillInst; + if (KillInst->isDebugValue()) + continue; + if (KillInst->readsRegister(SrcReg)) + break; + } + } else { + // We just inserted this copy. + KillInst = prior(InsertPos); + } + } + assert(KillInst->readsRegister(SrcReg) && + "Cannot find kill instruction"); + + SlotIndex LastUseIndex = LIS->getInstructionIndex(KillInst); + SrcLI.removeRange(LastUseIndex.getRegSlot(), + LIS->getMBBEndIdx(&opBlock)); + } + } + } } // Really delete the PHI instruction now, if it is not in the LoweredPHIs map. - if (reusedIncoming || !IncomingReg) + if (reusedIncoming || !IncomingReg) { + if (LIS) + LIS->RemoveMachineInstrFromMaps(MPhi); MF.DeleteMachineInstr(MPhi); + } } /// analyzePHINodes - Gather information about the PHI nodes in here. In @@ -418,7 +539,6 @@ void PHIElimination::analyzePHINodes(const MachineFunction& MF) { bool PHIElimination::SplitPHIEdges(MachineFunction &MF, MachineBasicBlock &MBB, - LiveVariables &LV, MachineLoopInfo *MLI) { if (MBB.empty() || !MBB.front().isPHI() || MBB.isLandingPad()) return false; // Quick exit for basic blocks without PHIs. @@ -438,10 +558,10 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // Avoid splitting backedges of loops. It would introduce small // out-of-line blocks into the loop which is very bad for code placement. - if (PreMBB == &MBB) + if (PreMBB == &MBB && !SplitAllCriticalEdges) continue; const MachineLoop *PreLoop = MLI ? MLI->getLoopFor(PreMBB) : 0; - if (IsLoopHeader && PreLoop == CurLoop) + if (IsLoopHeader && PreLoop == CurLoop && !SplitAllCriticalEdges) continue; // LV doesn't consider a phi use live-out, so isLiveOut only returns true @@ -450,7 +570,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // there is a risk it may not be coalesced away. // // If the copy would be a kill, there is no need to split the edge. - if (!LV.isLiveOut(Reg, *PreMBB)) + if (!isLiveOutPastPHIs(Reg, PreMBB) && !SplitAllCriticalEdges) continue; DEBUG(dbgs() << PrintReg(Reg) << " live-out before critical edge BB#" @@ -465,7 +585,7 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, // is likely to be left after coalescing. If we are looking at a loop // exiting edge, split it so we won't insert code in the loop, otherwise // don't bother. - bool ShouldSplit = !LV.isLiveIn(Reg, MBB); + bool ShouldSplit = !isLiveIn(Reg, &MBB) || SplitAllCriticalEdges; // Check for a loop exiting edge. if (!ShouldSplit && CurLoop != PreLoop) { @@ -492,3 +612,33 @@ bool PHIElimination::SplitPHIEdges(MachineFunction &MF, } return Changed; } + +bool PHIElimination::isLiveIn(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveIn() requires either LiveVariables or LiveIntervals"); + if (LIS) + return LIS->isLiveInToMBB(LIS->getInterval(Reg), MBB); + else + return LV->isLiveIn(Reg, *MBB); +} + +bool PHIElimination::isLiveOutPastPHIs(unsigned Reg, MachineBasicBlock *MBB) { + assert((LV || LIS) && + "isLiveOutPastPHIs() requires either LiveVariables or LiveIntervals"); + // LiveVariables considers uses in PHIs to be in the predecessor basic block, + // so that a register used only in a PHI is not live out of the block. In + // contrast, LiveIntervals considers uses in PHIs to be on the edge rather than + // in the predecessor basic block, so that a register used only in a PHI is live + // out of the block. + if (LIS) { + const LiveInterval &LI = LIS->getInterval(Reg); + for (MachineBasicBlock::succ_iterator SI = MBB->succ_begin(), + SE = MBB->succ_end(); SI != SE; ++SI) { + if (LI.liveAt(LIS->getMBBStartIdx(*SI))) + return true; + } + return false; + } else { + return LV->isLiveOut(Reg, *MBB); + } +} diff --git a/lib/CodeGen/PHIEliminationUtils.cpp b/lib/CodeGen/PHIEliminationUtils.cpp index 10bfdcce6769..e1b56e962fa9 100644 --- a/lib/CodeGen/PHIEliminationUtils.cpp +++ b/lib/CodeGen/PHIEliminationUtils.cpp @@ -8,10 +8,10 @@ //===----------------------------------------------------------------------===// #include "PHIEliminationUtils.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; // findCopyInsertPoint - Find a safe place in MBB to insert a copy from SrcReg diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 4ea21d4ff7bd..1af65c88abeb 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -12,21 +12,21 @@ // //===---------------------------------------------------------------------===// +#include "llvm/CodeGen/Passes.h" #include "llvm/Analysis/Passes.h" #include "llvm/Analysis/Verifier.h" -#include "llvm/Transforms/Scalar.h" -#include "llvm/PassManager.h" +#include "llvm/Assembly/PrintModulePass.h" #include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/PassManager.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/Transforms/Scalar.h" using namespace llvm; @@ -39,12 +39,9 @@ static cl::opt DisableTailDuplicate("disable-tail-duplicate", cl::Hidden, static cl::opt DisableEarlyTailDup("disable-early-taildup", cl::Hidden, cl::desc("Disable pre-register allocation tail duplication")); static cl::opt DisableBlockPlacement("disable-block-placement", - cl::Hidden, cl::desc("Disable the probability-driven block placement, and " - "re-enable the old code placement pass")); + cl::Hidden, cl::desc("Disable probability-driven block placement")); static cl::opt EnableBlockPlacementStats("enable-block-placement-stats", cl::Hidden, cl::desc("Collect probability-driven block placement stats")); -static cl::opt DisableCodePlace("disable-code-place", cl::Hidden, - cl::desc("Disable code placement")); static cl::opt DisableSSC("disable-ssc", cl::Hidden, cl::desc("Disable Stack Slot Coloring")); static cl::opt DisableMachineDCE("disable-machine-dce", cl::Hidden, @@ -88,7 +85,7 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, cl::desc("Print machine instrs"), cl::value_desc("pass-name"), cl::init("option-unspecified")); -// Experimental option to run live inteerval analysis early. +// Experimental option to run live interval analysis early. static cl::opt EarlyLiveIntervals("early-live-intervals", cl::Hidden, cl::desc("Run live interval analysis earlier in the pipeline")); @@ -149,10 +146,7 @@ static AnalysisID overridePass(AnalysisID StandardID, AnalysisID TargetID) { return applyDisable(TargetID, DisableEarlyTailDup); if (StandardID == &MachineBlockPlacementID) - return applyDisable(TargetID, DisableCodePlace); - - if (StandardID == &CodePlacementOptID) - return applyDisable(TargetID, DisableCodePlace); + return applyDisable(TargetID, DisableBlockPlacement); if (StandardID == &StackSlotColoringID) return applyDisable(TargetID, DisableSSC); @@ -237,11 +231,10 @@ TargetPassConfig::TargetPassConfig(TargetMachine *tm, PassManagerBase &pm) substitutePass(&EarlyTailDuplicateID, &TailDuplicateID); substitutePass(&PostRAMachineLICMID, &MachineLICMID); - // Disable early if-conversion. Targets that are ready can enable it. - disablePass(&EarlyIfConverterID); - // Temporarily disable experimental passes. - substitutePass(&MachineSchedulerID, 0); + const TargetSubtargetInfo &ST = TM->getSubtarget(); + if (!ST.enableMachineScheduler()) + disablePass(&MachineSchedulerID); } /// Insert InsertedPassID pass after TargetPassID. @@ -359,7 +352,7 @@ void TargetPassConfig::addIRPasses() { // Run loop strength reduction before anything else. if (getOptLevel() != CodeGenOpt::None && !DisableLSR) { - addPass(createLoopStrengthReducePass(getTargetLowering())); + addPass(createLoopStrengthReducePass()); if (PrintLSR) addPass(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); } @@ -397,12 +390,16 @@ void TargetPassConfig::addPassesToHandleExceptions() { } } -/// Add common passes that perform LLVM IR to IR transforms in preparation for -/// instruction selection. -void TargetPassConfig::addISelPrepare() { +/// Add pass to prepare the LLVM IR for code generation. This should be done +/// before exception handling preparation passes. +void TargetPassConfig::addCodeGenPrepare() { if (getOptLevel() != CodeGenOpt::None && !DisableCGP) addPass(createCodeGenPreparePass(getTargetLowering())); +} +/// Add common passes that perform LLVM IR to IR transforms in preparation for +/// instruction selection. +void TargetPassConfig::addISelPrepare() { addPass(createStackProtectorPass(getTargetLowering())); addPreISel(); @@ -462,8 +459,7 @@ void TargetPassConfig::addMachinePasses() { // Add passes that optimize machine instructions in SSA form. if (getOptLevel() != CodeGenOpt::None) { addMachineSSAOptimization(); - } - else { + } else { // If the target requests it, assign local variables to stack slots relative // to one another and simplify frame index references where possible. addPass(&LocalStackSlotAllocationID); @@ -507,9 +503,10 @@ void TargetPassConfig::addMachinePasses() { } // GC - addPass(&GCMachineCodeAnalysisID); - if (PrintGCInfo) - addPass(createGCInfoPrinter(dbgs())); + if (addGCPasses()) { + if (PrintGCInfo) + addPass(createGCInfoPrinter(dbgs())); + } // Basic block placement. if (getOptLevel() != CodeGenOpt::None) @@ -544,7 +541,12 @@ void TargetPassConfig::addMachineSSAOptimization() { addPass(&DeadMachineInstructionElimID); printAndVerify("After codegen DCE pass"); - addPass(&EarlyIfConverterID); + // Allow targets to insert passes that improve instruction level parallelism, + // like if-conversion. Such passes will typically need dominator trees and + // loop info, just like LICM and CSE below. + if (addILPOpts()) + printAndVerify("After ILP optimizations"); + addPass(&MachineLICMID); addPass(&MachineCSEID); addPass(&MachineSinkingID); @@ -726,18 +728,15 @@ void TargetPassConfig::addMachineLateOptimization() { printAndVerify("After copy propagation pass"); } +/// Add standard GC passes. +bool TargetPassConfig::addGCPasses() { + addPass(&GCMachineCodeAnalysisID); + return true; +} + /// Add standard basic block placement passes. void TargetPassConfig::addBlockPlacement() { - AnalysisID PassID = 0; - if (!DisableBlockPlacement) { - // MachineBlockPlacement is a new pass which subsumes the functionality of - // CodPlacementOpt. The old code placement pass can be restored by - // disabling block placement, but eventually it will be removed. - PassID = addPass(&MachineBlockPlacementID); - } else { - PassID = addPass(&CodePlacementOptID); - } - if (PassID) { + if (addPass(&MachineBlockPlacementID)) { // Run a separate pass to collect block placement statistics. if (EnableBlockPlacementStats) addPass(&MachineBlockPlacementStatsID); diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index a795ac8448f5..a7439b5129b5 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -49,20 +49,26 @@ // v1 = bitcast v0 // = v0 // +// - Optimize Loads: +// +// Loads that can be folded into a later instruction. A load is foldable +// if it loads to virtual registers and the virtual register defined has +// a single use. //===----------------------------------------------------------------------===// #define DEBUG_TYPE "peephole-opt" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; // Optimize Extensions @@ -473,6 +479,9 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, } bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); + DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); + if (DisablePeephole) return false; @@ -547,6 +556,8 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { FoldAsLoadDefReg, DefMI); if (FoldMI) { // Update LocalMIs since we replaced MI with FoldMI and deleted DefMI. + DEBUG(dbgs() << "Replacing: " << *MI); + DEBUG(dbgs() << " With: " << *FoldMI); LocalMIs.erase(MI); LocalMIs.erase(DefMI); LocalMIs.insert(FoldMI); diff --git a/lib/CodeGen/PostRASchedulerList.cpp b/lib/CodeGen/PostRASchedulerList.cpp index d57bc7362de9..53fe273a1032 100644 --- a/lib/CodeGen/PostRASchedulerList.cpp +++ b/lib/CodeGen/PostRASchedulerList.cpp @@ -19,32 +19,33 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "post-RA-sched" -#include "AntiDepBreaker.h" +#include "llvm/CodeGen/Passes.h" #include "AggressiveAntiDepBreaker.h" +#include "AntiDepBreaker.h" #include "CriticalAntiDepBreaker.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/ScheduleDAGInstrs.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; STATISTIC(NumNoops, "Number of noops inserted"); @@ -111,9 +112,6 @@ namespace { /// added to the AvailableQueue. std::vector PendingQueue; - /// Topo - A topological ordering for SUnits. - ScheduleDAGTopologicalSort Topo; - /// HazardRec - The hazard recognizer to use. ScheduleHazardRecognizer *HazardRec; @@ -198,7 +196,7 @@ SchedulePostRATDList::SchedulePostRATDList( AliasAnalysis *AA, const RegisterClassInfo &RCI, TargetSubtargetInfo::AntiDepBreakMode AntiDepMode, SmallVectorImpl &CriticalPathRCs) - : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), Topo(SUnits), AA(AA), + : ScheduleDAGInstrs(MF, MLI, MDT, /*IsPostRA=*/true), AA(AA), LiveRegs(TRI->getNumRegs()) { const TargetMachine &TM = MF.getTarget(); @@ -420,11 +418,11 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { // Start with no live registers. LiveRegs.reset(); - // Determine the live-out physregs for this block. - if (!BB->empty() && BB->back().isReturn()) { - // In a return block, examine the function live-out regs. - for (MachineRegisterInfo::liveout_iterator I = MRI.liveout_begin(), - E = MRI.liveout_end(); I != E; ++I) { + // Examine the live-in regs of all successors. + for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), + E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; LiveRegs.set(Reg); // Repeat, for all subregs. @@ -432,20 +430,6 @@ void SchedulePostRATDList::StartBlockForKills(MachineBasicBlock *BB) { LiveRegs.set(*SubRegs); } } - else { - // In a non-return block, examine the live-in regs of all successors. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) { - for (MachineBasicBlock::livein_iterator I = (*SI)->livein_begin(), - E = (*SI)->livein_end(); I != E; ++I) { - unsigned Reg = *I; - LiveRegs.set(Reg); - // Repeat, for all subregs. - for (MCSubRegIterator SubRegs(Reg, TRI); SubRegs.isValid(); ++SubRegs) - LiveRegs.set(*SubRegs); - } - } - } } bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, @@ -467,13 +451,10 @@ bool SchedulePostRATDList::ToggleKillFlag(MachineInstr *MI, MO.setIsKill(false); bool AllDead = true; const unsigned SuperReg = MO.getReg(); + MachineInstrBuilder MIB(MF, MI); for (MCSubRegIterator SubRegs(SuperReg, TRI); SubRegs.isValid(); ++SubRegs) { if (LiveRegs.test(*SubRegs)) { - MI->addOperand(MachineOperand::CreateReg(*SubRegs, - true /*IsDef*/, - true /*IsImp*/, - false /*IsKill*/, - false /*IsDead*/)); + MIB.addReg(*SubRegs, RegState::ImplicitDefine); AllDead = false; } } @@ -580,10 +561,14 @@ void SchedulePostRATDList::FixupKills(MachineBasicBlock *MBB) { //===----------------------------------------------------------------------===// /// ReleaseSucc - Decrement the NumPredsLeft count of a successor. Add it to -/// the PendingQueue if the count reaches zero. Also update its cycle bound. +/// the PendingQueue if the count reaches zero. void SchedulePostRATDList::ReleaseSucc(SUnit *SU, SDep *SuccEdge) { SUnit *SuccSU = SuccEdge->getSUnit(); + if (SuccEdge->isWeak()) { + --SuccSU->WeakPredsLeft; + return; + } #ifndef NDEBUG if (SuccSU->NumPredsLeft == 0) { dbgs() << "*** Scheduling failed! ***\n"; @@ -653,8 +638,7 @@ void SchedulePostRATDList::ListScheduleTopDown() { // Add all leaves to Available queue. for (unsigned i = 0, e = SUnits.size(); i != e; ++i) { // It is available if it has no predecessors. - bool available = SUnits[i].Preds.empty(); - if (available) { + if (!SUnits[i].NumPredsLeft && !SUnits[i].isAvailable) { AvailableQueue.push(&SUnits[i]); SUnits[i].isAvailable = true; } diff --git a/lib/CodeGen/PrologEpilogInserter.cpp b/lib/CodeGen/PrologEpilogInserter.cpp index 77554d691c26..e5872df731a0 100644 --- a/lib/CodeGen/PrologEpilogInserter.cpp +++ b/lib/CodeGen/PrologEpilogInserter.cpp @@ -21,25 +21,24 @@ #define DEBUG_TYPE "pei" #include "PrologEpilogInserter.h" -#include "llvm/InlineAsm.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -56,7 +55,6 @@ INITIALIZE_PASS_END(PEI, "prologepilog", "Prologue/Epilogue Insertion & Frame Finalization", false, false) -STATISTIC(NumVirtualFrameRegs, "Number of virtual frame regs encountered"); STATISTIC(NumScavengedRegs, "Number of frame index regs scavenged"); STATISTIC(NumBytesStackSpace, "Number of bytes used for stack in all functions"); @@ -96,12 +94,13 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { placeCSRSpillsAndRestores(Fn); // Add the code to save and restore the callee saved registers - if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) insertCSRSpillsAndRestores(Fn); // Allow the target machine to make final modifications to the function // before the frame layout is finalized. - TFI->processFunctionBeforeFrameFinalized(Fn); + TFI->processFunctionBeforeFrameFinalized(Fn, RS); // Calculate actual frame offsets for all abstract stack objects... calculateFrameObjectOffsets(Fn); @@ -111,7 +110,8 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { // called functions. Because of this, calculateCalleeSavedRegisters() // must be called before this function in order to set the AdjustsStack // and MaxCallFrameSize variables. - if (!F->getFnAttributes().hasAttribute(Attributes::Naked)) + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) insertPrologEpilogCode(Fn); // Replace all MO_FrameIndex operands with physical register references @@ -133,24 +133,10 @@ bool PEI::runOnMachineFunction(MachineFunction &Fn) { return true; } -#if 0 -void PEI::getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesCFG(); - if (ShrinkWrapping || ShrinkWrapFunc != "") { - AU.addRequired(); - AU.addRequired(); - } - AU.addPreserved(); - AU.addPreserved(); - MachineFunctionPass::getAnalysisUsage(AU); -} -#endif - /// calculateCallsInformation - Calculate the MaxCallFrameSize and AdjustsStack /// variables for the function's frame information and eliminate call frame /// pseudo instructions. void PEI::calculateCallsInformation(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); const TargetInstrInfo &TII = *Fn.getTarget().getInstrInfo(); const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); MachineFrameInfo *MFI = Fn.getFrameInfo(); @@ -197,20 +183,20 @@ void PEI::calculateCallsInformation(MachineFunction &Fn) { // here. The sub/add sp instruction pairs are still inserted, but we don't // need to track the SP adjustment for frame index elimination. if (TFI->canSimplifyCallFramePseudos(Fn)) - RegInfo->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); + TFI->eliminateCallFramePseudoInstr(Fn, *I->getParent(), I); } } /// calculateCalleeSavedRegisters - Scan the function for modified callee saved /// registers. -void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { - const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); - const TargetFrameLowering *TFI = Fn.getTarget().getFrameLowering(); - MachineFrameInfo *MFI = Fn.getFrameInfo(); +void PEI::calculateCalleeSavedRegisters(MachineFunction &F) { + const TargetRegisterInfo *RegInfo = F.getTarget().getRegisterInfo(); + const TargetFrameLowering *TFI = F.getTarget().getFrameLowering(); + MachineFrameInfo *MFI = F.getFrameInfo(); // Get the callee saved register list... - const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&Fn); + const uint16_t *CSRegs = RegInfo->getCalleeSavedRegs(&F); // These are used to keep track the callee-save area. Initialize them. MinCSFrameIndex = INT_MAX; @@ -221,13 +207,14 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { return; // In Naked functions we aren't going to save any registers. - if (Fn.getFunction()->getFnAttributes().hasAttribute(Attributes::Naked)) + if (F.getFunction()->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::Naked)) return; std::vector CSI; for (unsigned i = 0; CSRegs[i]; ++i) { unsigned Reg = CSRegs[i]; - if (Fn.getRegInfo().isPhysRegUsed(Reg)) { + if (F.getRegInfo().isPhysRegUsed(Reg)) { // If the reg is modified, save it! CSI.push_back(CalleeSavedInfo(Reg)); } @@ -248,7 +235,7 @@ void PEI::calculateCalleeSavedRegisters(MachineFunction &Fn) { const TargetRegisterClass *RC = RegInfo->getMinimalPhysRegClass(Reg); int FrameIdx; - if (RegInfo->hasReservedSpillSlot(Fn, Reg, FrameIdx)) { + if (RegInfo->hasReservedSpillSlot(F, Reg, FrameIdx)) { I->setFrameIdx(FrameIdx); continue; } @@ -560,9 +547,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { const TargetRegisterInfo *RegInfo = Fn.getTarget().getRegisterInfo(); if (RS && TFI.hasFP(Fn) && RegInfo->useFPForScavengingIndex(Fn) && !RegInfo->needsStackRealignment(Fn)) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } // FIXME: Once this is working, then enable flag will change to a target @@ -605,7 +594,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -627,7 +616,7 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { continue; if (i >= MinCSFrameIndex && i <= MaxCSFrameIndex) continue; - if (RS && (int)i == RS->getScavengingFrameIndex()) + if (RS && RS->isScavengingFrameIndex((int)i)) continue; if (MFI->isDeadObjectIndex(i)) continue; @@ -643,9 +632,11 @@ void PEI::calculateFrameObjectOffsets(MachineFunction &Fn) { // stack pointer. if (RS && (!TFI.hasFP(Fn) || RegInfo->needsStackRealignment(Fn) || !RegInfo->useFPForScavengingIndex(Fn))) { - int SFI = RS->getScavengingFrameIndex(); - if (SFI >= 0) - AdjustStackOffset(MFI, SFI, StackGrowsDown, Offset, MaxAlign); + SmallVector SFIs; + RS->getScavengingFrameIndices(SFIs); + for (SmallVector::iterator I = SFIs.begin(), + IE = SFIs.end(); I != IE; ++I) + AdjustStackOffset(MFI, *I, StackGrowsDown, Offset, MaxAlign); } if (!TFI.targetHandlesStackFrameRounding()) { @@ -703,6 +694,14 @@ void PEI::insertPrologEpilogCode(MachineFunction &Fn) { // space in small chunks instead of one large contiguous block. if (Fn.getTarget().Options.EnableSegmentedStacks) TFI.adjustForSegmentedStacks(Fn); + + // Emit additional code that is required to explicitly handle the stack in + // HiPE native code (if needed) when loaded in the Erlang/OTP runtime. The + // approach is rather similar to that of Segmented Stacks, but it uses a + // different conditional check and another BIF for allocating more stack + // space. + if (Fn.getFunction()->getCallingConv() == CallingConv::HiPE) + TFI.adjustForHiPEPrologue(Fn); } /// replaceFrameIndices - Replace all MO_FrameIndex operands with physical @@ -749,7 +748,7 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineBasicBlock::iterator PrevI = BB->end(); if (I != BB->begin()) PrevI = prior(I); - TRI.eliminateCallFramePseudoInstr(Fn, *BB, I); + TFI->eliminateCallFramePseudoInstr(Fn, *BB, I); // Visit the instructions created by eliminateCallFramePseudoInstr(). if (PrevI == BB->end()) @@ -761,34 +760,36 @@ void PEI::replaceFrameIndices(MachineFunction &Fn) { MachineInstr *MI = I; bool DoIncr = true; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) - if (MI->getOperand(i).isFI()) { - // Some instructions (e.g. inline asm instructions) can have - // multiple frame indices and/or cause eliminateFrameIndex - // to insert more than one instruction. We need the register - // scavenger to go through all of these instructions so that - // it can update its register information. We keep the - // iterator at the point before insertion so that we can - // revisit them in full. - bool AtBeginning = (I == BB->begin()); - if (!AtBeginning) --I; - - // If this instruction has a FrameIndex operand, we need to - // use that target machine register info object to eliminate - // it. - TRI.eliminateFrameIndex(MI, SPAdj, - FrameIndexVirtualScavenging ? NULL : RS); - - // Reset the iterator if we were at the beginning of the BB. - if (AtBeginning) { - I = BB->begin(); - DoIncr = false; - } + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (!MI->getOperand(i).isFI()) + continue; - MI = 0; - break; + // Some instructions (e.g. inline asm instructions) can have + // multiple frame indices and/or cause eliminateFrameIndex + // to insert more than one instruction. We need the register + // scavenger to go through all of these instructions so that + // it can update its register information. We keep the + // iterator at the point before insertion so that we can + // revisit them in full. + bool AtBeginning = (I == BB->begin()); + if (!AtBeginning) --I; + + // If this instruction has a FrameIndex operand, we need to + // use that target machine register info object to eliminate + // it. + TRI.eliminateFrameIndex(MI, SPAdj, i, + FrameIndexVirtualScavenging ? NULL : RS); + + // Reset the iterator if we were at the beginning of the BB. + if (AtBeginning) { + I = BB->begin(); + DoIncr = false; } + MI = 0; + break; + } + if (DoIncr && I != BB->end()) ++I; // Update register states. @@ -818,14 +819,22 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { E = Fn.end(); BB != E; ++BB) { RS->enterBasicBlock(BB); - unsigned VirtReg = 0; - unsigned ScratchReg = 0; int SPAdj = 0; // The instruction stream may change in the loop, so check BB->end() // directly. for (MachineBasicBlock::iterator I = BB->begin(); I != BB->end(); ) { MachineInstr *MI = I; + MachineBasicBlock::iterator J = llvm::next(I); + MachineBasicBlock::iterator P = I == BB->begin() ? + MachineBasicBlock::iterator(NULL) : llvm::prior(I); + + // RS should process this instruction before we might scavenge at this + // location. This is because we might be replacing a virtual register + // defined by this instruction, and if so, registers killed by this + // instruction are available, and defined registers are not. + RS->forward(I); + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { if (MI->getOperand(i).isReg()) { MachineOperand &MO = MI->getOperand(i); @@ -835,29 +844,49 @@ void PEI::scavengeFrameVirtualRegs(MachineFunction &Fn) { if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; - ++NumVirtualFrameRegs; - - // Have we already allocated a scratch register for this virtual? - if (Reg != VirtReg) { - // When we first encounter a new virtual register, it - // must be a definition. - assert(MI->getOperand(i).isDef() && - "frame index virtual missing def!"); - // Scavenge a new scratch register - VirtReg = Reg; - const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); - ScratchReg = RS->scavengeRegister(RC, I, SPAdj); - ++NumScavengedRegs; - } + // When we first encounter a new virtual register, it + // must be a definition. + assert(MI->getOperand(i).isDef() && + "frame index virtual missing def!"); + // Scavenge a new scratch register + const TargetRegisterClass *RC = Fn.getRegInfo().getRegClass(Reg); + unsigned ScratchReg = RS->scavengeRegister(RC, J, SPAdj); + + ++NumScavengedRegs; + // Replace this reference to the virtual register with the // scratch register. assert (ScratchReg && "Missing scratch register!"); - MI->getOperand(i).setReg(ScratchReg); + Fn.getRegInfo().replaceRegWith(Reg, ScratchReg); + // Because this instruction was processed by the RS before this + // register was allocated, make sure that the RS now records the + // register as being used. + RS->setUsed(ScratchReg); } } - RS->forward(I); - ++I; + + // If the scavenger needed to use one of its spill slots, the + // spill code will have been inserted in between I and J. This is a + // problem because we need the spill code before I: Move I to just + // prior to J. + if (I != llvm::prior(J)) { + BB->splice(J, BB, I); + + // Before we move I, we need to prepare the RS to visit I again. + // Specifically, RS will assert if it sees uses of registers that + // it believes are undefined. Because we have already processed + // register kills in I, when it visits I again, it will believe that + // those registers are undefined. To avoid this situation, unprocess + // the instruction I. + assert(RS->getCurrentPosition() == I && + "The register scavenger has an unexpected position"); + I = P; + RS->unprocess(P); + + // RS->skipTo(I == BB->begin() ? NULL : llvm::prior(I)); + } else + ++I; } } } diff --git a/lib/CodeGen/PrologEpilogInserter.h b/lib/CodeGen/PrologEpilogInserter.h index 0d140a9bb481..87fff9afb309 100644 --- a/lib/CodeGen/PrologEpilogInserter.h +++ b/lib/CodeGen/PrologEpilogInserter.h @@ -22,11 +22,11 @@ #ifndef LLVM_CODEGEN_PEI_H #define LLVM_CODEGEN_PEI_H -#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/ADT/SparseBitVector.h" -#include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/Target/TargetRegisterInfo.h" namespace llvm { diff --git a/lib/CodeGen/PseudoSourceValue.cpp b/lib/CodeGen/PseudoSourceValue.cpp index 49599b3ab980..85649111d7f1 100644 --- a/lib/CodeGen/PseudoSourceValue.cpp +++ b/lib/CodeGen/PseudoSourceValue.cpp @@ -11,14 +11,14 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; diff --git a/lib/CodeGen/RegAllocBase.cpp b/lib/CodeGen/RegAllocBase.cpp index 993dbc71ded3..c0355903574f 100644 --- a/lib/CodeGen/RegAllocBase.cpp +++ b/lib/CodeGen/RegAllocBase.cpp @@ -14,14 +14,14 @@ #define DEBUG_TYPE "regalloc" #include "RegAllocBase.h" -#include "LiveRegMatrix.h" #include "Spiller.h" -#include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #ifndef NDEBUG diff --git a/lib/CodeGen/RegAllocBase.h b/lib/CodeGen/RegAllocBase.h index db0c8e13d30a..064e40f06b7b 100644 --- a/lib/CodeGen/RegAllocBase.h +++ b/lib/CodeGen/RegAllocBase.h @@ -37,9 +37,9 @@ #ifndef LLVM_CODEGEN_REGALLOCBASE #define LLVM_CODEGEN_REGALLOCBASE -#include "LiveIntervalUnion.h" -#include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/ADT/OwningPtr.h" +#include "llvm/CodeGen/LiveIntervalUnion.h" +#include "llvm/CodeGen/RegisterClassInfo.h" namespace llvm { diff --git a/lib/CodeGen/RegAllocBasic.cpp b/lib/CodeGen/RegAllocBasic.cpp index 8a49609552ad..0b6dc68cdf09 100644 --- a/lib/CodeGen/RegAllocBasic.cpp +++ b/lib/CodeGen/RegAllocBasic.cpp @@ -13,30 +13,28 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" -#include "RegAllocBase.h" #include "LiveDebugVariables.h" +#include "RegAllocBase.h" #include "Spiller.h" -#include "VirtRegMap.h" -#include "LiveRegMatrix.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" - +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include #include diff --git a/lib/CodeGen/RegAllocFast.cpp b/lib/CodeGen/RegAllocFast.cpp index 88922169b306..bb9c05c5f42d 100644 --- a/lib/CodeGen/RegAllocFast.cpp +++ b/lib/CodeGen/RegAllocFast.cpp @@ -13,28 +13,28 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "llvm/BasicBlock.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/IndexedMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/SparseSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/BasicBlock.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/IndexedMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SparseSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" #include using namespace llvm; @@ -113,12 +113,27 @@ namespace { // PhysRegState - One of the RegState enums, or a virtreg. std::vector PhysRegState; + // Set of register units. typedef SparseSet UsedInInstrSet; - // UsedInInstr - Set of physregs that are used in the current instruction, - // and so cannot be allocated. + // Set of register units that are used in the current instruction, and so + // cannot be allocated. UsedInInstrSet UsedInInstr; + // Mark a physreg as used in this instruction. + void markRegUsedInInstr(unsigned PhysReg) { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + UsedInInstr.insert(*Units); + } + + // Check if a physreg or any of its aliases are used in this instruction. + bool isRegUsedInInstr(unsigned PhysReg) const { + for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) + if (UsedInInstr.count(*Units)) + return true; + return false; + } + // SkippedInstrs - Descriptors of instructions whose clobber list was // ignored because all registers were spilled. It is still necessary to // mark all the clobbered registers as used by the function. @@ -177,7 +192,6 @@ namespace { unsigned VirtReg, unsigned Hint); void spillAll(MachineBasicBlock::iterator MI); bool setPhysReg(MachineInstr *MI, unsigned OpNum, unsigned PhysReg); - void addRetOperands(MachineBasicBlock *MBB); }; char RAFast::ID = 0; } @@ -334,7 +348,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { unsigned PhysReg = MO.getReg(); assert(TargetRegisterInfo::isPhysicalRegister(PhysReg) && "Bad usePhysReg operand"); - + markRegUsedInInstr(PhysReg); switch (PhysRegState[PhysReg]) { case regDisabled: break; @@ -342,7 +356,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { PhysRegState[PhysReg] = regFree; // Fall through case regFree: - UsedInInstr.insert(PhysReg); MO.setIsKill(); return; default: @@ -362,13 +375,11 @@ void RAFast::usePhysReg(MachineOperand &MO) { "Instruction is not using a subregister of a reserved register"); // Leave the superregister in the working set. PhysRegState[Alias] = regFree; - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; case regFree: if (TRI->isSuperRegister(PhysReg, Alias)) { // Leave the superregister in the working set. - UsedInInstr.insert(Alias); MO.getParent()->addRegisterKilled(Alias, TRI, true); return; } @@ -382,7 +393,6 @@ void RAFast::usePhysReg(MachineOperand &MO) { // All aliases are disabled, bring register into working set. PhysRegState[PhysReg] = regFree; - UsedInInstr.insert(PhysReg); MO.setIsKill(); } @@ -391,7 +401,7 @@ void RAFast::usePhysReg(MachineOperand &MO) { /// reserved instead of allocated. void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, RegState NewState) { - UsedInInstr.insert(PhysReg); + markRegUsedInInstr(PhysReg); switch (unsigned VirtReg = PhysRegState[PhysReg]) { case regDisabled: break; @@ -431,7 +441,7 @@ void RAFast::definePhysReg(MachineInstr *MI, unsigned PhysReg, // can be allocated directly. // Returns spillImpossible when PhysReg or an alias can't be spilled. unsigned RAFast::calcSpillCost(unsigned PhysReg) const { - if (UsedInInstr.count(PhysReg)) { + if (isRegUsedInInstr(PhysReg)) { DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is already used in instr.\n"); return spillImpossible; } @@ -456,8 +466,6 @@ unsigned RAFast::calcSpillCost(unsigned PhysReg) const { unsigned Cost = 0; for (MCRegAliasIterator AI(PhysReg, TRI, false); AI.isValid(); ++AI) { unsigned Alias = *AI; - if (UsedInInstr.count(Alias)) - return spillImpossible; switch (unsigned VirtReg = PhysRegState[Alias]) { case regDisabled: break; @@ -527,12 +535,12 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, } } - ArrayRef AO = RegClassInfo.getOrder(RC); + ArrayRef AO = RegClassInfo.getOrder(RC); // First try to find a completely free register. - for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { + for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned PhysReg = *I; - if (PhysRegState[PhysReg] == regFree && !UsedInInstr.count(PhysReg)) { + if (PhysRegState[PhysReg] == regFree && !isRegUsedInInstr(PhysReg)) { assignVirtToPhysReg(*LRI, PhysReg); return LRI; } @@ -542,7 +550,7 @@ RAFast::LiveRegMap::iterator RAFast::allocVirtReg(MachineInstr *MI, << RC->getName() << "\n"); unsigned BestReg = 0, BestCost = spillImpossible; - for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I) { + for (ArrayRef::iterator I = AO.begin(), E = AO.end(); I != E; ++I){ unsigned Cost = calcSpillCost(*I); DEBUG(dbgs() << "\tRegister: " << PrintReg(*I, TRI) << "\n"); DEBUG(dbgs() << "\tCost: " << Cost << "\n"); @@ -598,7 +606,7 @@ RAFast::defineVirtReg(MachineInstr *MI, unsigned OpNum, LRI->LastUse = MI; LRI->LastOpNum = OpNum; LRI->Dirty = true; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -648,7 +656,7 @@ RAFast::reloadVirtReg(MachineInstr *MI, unsigned OpNum, assert(LRI->PhysReg && "Register not assigned"); LRI->LastUse = MI; LRI->LastOpNum = OpNum; - UsedInInstr.insert(LRI->PhysReg); + markRegUsedInInstr(LRI->PhysReg); return LRI; } @@ -709,8 +717,8 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!MO.isReg() || !MO.isDef()) continue; unsigned Reg = MO.getReg(); if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; + markRegUsedInInstr(Reg); for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - UsedInInstr.insert(*AI); if (ThroughRegs.count(PhysRegState[*AI])) definePhysReg(MI, *AI, regFree); } @@ -766,67 +774,12 @@ void RAFast::handleThroughOperands(MachineInstr *MI, if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; DEBUG(dbgs() << "\tSetting " << PrintReg(Reg, TRI) << " as used in instr\n"); - UsedInInstr.insert(Reg); + markRegUsedInInstr(Reg); } // Also mark PartialDefs as used to avoid reallocation. for (unsigned i = 0, e = PartialDefs.size(); i != e; ++i) - UsedInInstr.insert(PartialDefs[i]); -} - -/// addRetOperand - ensure that a return instruction has an operand for each -/// value live out of the function. -/// -/// Things marked both call and return are tail calls; do not do this for them. -/// The tail callee need not take the same registers as input that it produces -/// as output, and there are dependencies for its input registers elsewhere. -/// -/// FIXME: This should be done as part of instruction selection, and this helper -/// should be deleted. Until then, we use custom logic here to create the proper -/// operand under all circumstances. We can't use addRegisterKilled because that -/// doesn't make sense for undefined values. We can't simply avoid calling it -/// for undefined values, because we must ensure that the operand always exists. -void RAFast::addRetOperands(MachineBasicBlock *MBB) { - if (MBB->empty() || !MBB->back().isReturn() || MBB->back().isCall()) - return; - - MachineInstr *MI = &MBB->back(); - - for (MachineRegisterInfo::liveout_iterator - I = MBB->getParent()->getRegInfo().liveout_begin(), - E = MBB->getParent()->getRegInfo().liveout_end(); I != E; ++I) { - unsigned Reg = *I; - assert(TargetRegisterInfo::isPhysicalRegister(Reg) && - "Cannot have a live-out virtual register."); - - bool hasDef = PhysRegState[Reg] == regReserved; - - // Check if this register already has an operand. - bool Found = false; - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg() || !MO.isUse()) - continue; - - unsigned OperReg = MO.getReg(); - if (!TargetRegisterInfo::isPhysicalRegister(OperReg)) - continue; - - if (OperReg == Reg || TRI->isSuperRegister(OperReg, Reg)) { - // If the ret already has an operand for this physreg or a superset, - // don't duplicate it. Set the kill flag if the value is defined. - if (hasDef && !MO.isKill()) - MO.setIsKill(); - Found = true; - break; - } - } - if (!Found) - MI->addOperand(MachineOperand::CreateReg(Reg, - false /*IsDef*/, - true /*IsImp*/, - hasDef/*IsKill*/)); - } + markRegUsedInInstr(PartialDefs[i]); } void RAFast::AllocateBasicBlock() { @@ -1025,7 +978,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); // Track registers defined by instruction - early clobbers and tied uses at // this point. @@ -1038,8 +991,7 @@ void RAFast::AllocateBasicBlock() { if (!Reg || !TargetRegisterInfo::isPhysicalRegister(Reg)) continue; // Look for physreg defs and tied uses. if (!MO.isDef() && !MI->isRegTiedToDefOperand(i)) continue; - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - UsedInInstr.insert(*AI); + markRegUsedInInstr(Reg); } } @@ -1091,7 +1043,7 @@ void RAFast::AllocateBasicBlock() { for (UsedInInstrSet::iterator I = UsedInInstr.begin(), E = UsedInInstr.end(); I != E; ++I) - MRI->setPhysRegUsed(*I); + MRI->setRegUnitUsed(*I); if (CopyDst && CopyDst == CopySrc && CopyDstSub == CopySrcSub) { DEBUG(dbgs() << "-- coalescing: " << *MI); @@ -1111,9 +1063,6 @@ void RAFast::AllocateBasicBlock() { MBB->erase(Coalesced[i]); NumCopies += Coalesced.size(); - // addRetOperands must run after we've seen all defs in this block. - addRetOperands(MBB); - DEBUG(MBB->dump()); } @@ -1130,7 +1079,7 @@ bool RAFast::runOnMachineFunction(MachineFunction &Fn) { MRI->freezeReservedRegs(Fn); RegClassInfo.runOnMachineFunction(Fn); UsedInInstr.clear(); - UsedInInstr.setUniverse(TRI->getNumRegs()); + UsedInInstr.setUniverse(TRI->getNumRegUnits()); assert(!MRI->isSSA() && "regalloc requires leaving SSA"); diff --git a/lib/CodeGen/RegAllocGreedy.cpp b/lib/CodeGen/RegAllocGreedy.cpp index 06f69c1e0d16..6d84176af261 100644 --- a/lib/CodeGen/RegAllocGreedy.cpp +++ b/lib/CodeGen/RegAllocGreedy.cpp @@ -13,36 +13,34 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" +#include "llvm/CodeGen/Passes.h" #include "AllocationOrder.h" #include "InterferenceCache.h" #include "LiveDebugVariables.h" -#include "LiveRegMatrix.h" #include "RegAllocBase.h" -#include "Spiller.h" #include "SpillPlacement.h" +#include "Spiller.h" #include "SplitKit.h" -#include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/PassAnalysisSupport.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" +#include "llvm/CodeGen/LiveRegMatrix.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegAllocRegistry.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/PassAnalysisSupport.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Timer.h" - +#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -414,7 +412,7 @@ void RAGreedy::enqueue(LiveInterval *LI) { Prio = (1u << 31) + Size; // Boost ranges that have a physical register hint. - if (TargetRegisterInfo::isPhysicalRegister(VRM->getRegAllocPref(Reg))) + if (VRM->hasKnownPreference(Reg)) Prio |= (1u << 30); } @@ -443,7 +441,7 @@ unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, while ((PhysReg = Order.next())) if (!Matrix->checkInterference(VirtReg, PhysReg)) break; - if (!PhysReg || Order.isHint(PhysReg)) + if (!PhysReg || Order.isHint()) return PhysReg; // PhysReg is available, but there may be a better choice. @@ -633,16 +631,33 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, // Keep track of the cheapest interference seen so far. EvictionCost BestCost(~0u); unsigned BestPhys = 0; + unsigned OrderLimit = Order.getOrder().size(); // When we are just looking for a reduced cost per use, don't break any // hints, and only evict smaller spill weights. if (CostPerUseLimit < ~0u) { BestCost.BrokenHints = 0; BestCost.MaxWeight = VirtReg.weight; + + // Check of any registers in RC are below CostPerUseLimit. + const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); + unsigned MinCost = RegClassInfo.getMinCost(RC); + if (MinCost >= CostPerUseLimit) { + DEBUG(dbgs() << RC->getName() << " minimum cost = " << MinCost + << ", no cheaper registers to be found.\n"); + return 0; + } + + // It is normal for register classes to have a long tail of registers with + // the same cost. We don't need to look at them if they're too expensive. + if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { + OrderLimit = RegClassInfo.getLastCostChange(RC); + DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); + } } Order.rewind(); - while (unsigned PhysReg = Order.next()) { + while (unsigned PhysReg = Order.nextWithDups(OrderLimit)) { if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) continue; // The first use of a callee-saved register in a function has cost 1. @@ -662,7 +677,7 @@ unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, BestPhys = PhysReg; // Stop if the hint can be used. - if (Order.isHint(PhysReg)) + if (Order.isHint()) break; } diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index 02ebce7a11a0..607edac24bd2 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -31,24 +31,24 @@ #define DEBUG_TYPE "regalloc" -#include "Spiller.h" -#include "VirtRegMap.h" +#include "llvm/CodeGen/RegAllocPBQP.h" #include "RegisterCoalescer.h" -#include "llvm/Module.h" +#include "Spiller.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/CalcSpillWeights.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" -#include "llvm/CodeGen/RegAllocPBQP.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/PBQP/HeuristicSolver.h" #include "llvm/CodeGen/PBQP/Graph.h" +#include "llvm/CodeGen/PBQP/HeuristicSolver.h" #include "llvm/CodeGen/PBQP/Heuristics/Briggs.h" #include "llvm/CodeGen/RegAllocRegistry.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" @@ -526,7 +526,7 @@ void RegAllocPBQP::finalizeAlloc() const { itr != end; ++itr) { LiveInterval *li = &lis->getInterval(*itr); - unsigned physReg = vrm->getRegAllocPref(li->reg); + unsigned physReg = mri->getSimpleHint(li->reg); if (physReg == 0) { const TargetRegisterClass *liRC = mri->getRegClass(li->reg); diff --git a/lib/CodeGen/RegisterClassInfo.cpp b/lib/CodeGen/RegisterClassInfo.cpp index 805d23567307..87382d8f7c42 100644 --- a/lib/CodeGen/RegisterClassInfo.cpp +++ b/lib/CodeGen/RegisterClassInfo.cpp @@ -18,10 +18,10 @@ #include "llvm/CodeGen/RegisterClassInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -44,7 +44,7 @@ void RegisterClassInfo::runOnMachineFunction(const MachineFunction &mf) { } // Does this MF have different CSRs? - const uint16_t *CSR = TRI->getCalleeSavedRegs(MF); + const MCPhysReg *CSR = TRI->getCalleeSavedRegs(MF); if (Update || CSR != CalleeSaved) { // Build a CSRNum map. Every CSR alias gets an entry pointing to the last // overlapping CSR. @@ -79,30 +79,47 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { unsigned NumRegs = RC->getNumRegs(); if (!RCI.Order) - RCI.Order.reset(new unsigned[NumRegs]); + RCI.Order.reset(new MCPhysReg[NumRegs]); unsigned N = 0; - SmallVector CSRAlias; + SmallVector CSRAlias; + unsigned MinCost = 0xff; + unsigned LastCost = ~0u; + unsigned LastCostChange = 0; // FIXME: Once targets reserve registers instead of removing them from the // allocation order, we can simply use begin/end here. - ArrayRef RawOrder = RC->getRawAllocationOrder(*MF); + ArrayRef RawOrder = RC->getRawAllocationOrder(*MF); for (unsigned i = 0; i != RawOrder.size(); ++i) { unsigned PhysReg = RawOrder[i]; // Remove reserved registers from the allocation order. if (Reserved.test(PhysReg)) continue; + unsigned Cost = TRI->getCostPerUse(PhysReg); + MinCost = std::min(MinCost, Cost); + if (CSRNum[PhysReg]) // PhysReg aliases a CSR, save it for later. CSRAlias.push_back(PhysReg); - else + else { + if (Cost != LastCost) + LastCostChange = N; RCI.Order[N++] = PhysReg; + LastCost = Cost; + } } RCI.NumRegs = N + CSRAlias.size(); assert (RCI.NumRegs <= NumRegs && "Allocation order larger than regclass"); // CSR aliases go after the volatile registers, preserve the target's order. - std::copy(CSRAlias.begin(), CSRAlias.end(), &RCI.Order[N]); + for (unsigned i = 0, e = CSRAlias.size(); i != e; ++i) { + unsigned PhysReg = CSRAlias[i]; + unsigned Cost = TRI->getCostPerUse(PhysReg); + if (Cost != LastCost) + LastCostChange = N; + RCI.Order[N++] = PhysReg; + LastCost = Cost; + } // Register allocator stress test. Clip register class to N registers. if (StressRA && RCI.NumRegs > StressRA) @@ -113,6 +130,9 @@ void RegisterClassInfo::compute(const TargetRegisterClass *RC) const { if (Super != RC && getNumAllocatableRegs(Super) > RCI.NumRegs) RCI.ProperSubClass = true; + RCI.MinCost = uint8_t(MinCost); + RCI.LastCostChange = LastCostChange; + DEBUG({ dbgs() << "AllocationOrder(" << RC->getName() << ") = ["; for (unsigned I = 0; I != RCI.NumRegs; ++I) diff --git a/lib/CodeGen/RegisterCoalescer.cpp b/lib/CodeGen/RegisterCoalescer.cpp index 2538f10ede59..d85646dd3c58 100644 --- a/lib/CodeGen/RegisterCoalescer.cpp +++ b/lib/CodeGen/RegisterCoalescer.cpp @@ -15,36 +15,30 @@ #define DEBUG_TYPE "regalloc" #include "RegisterCoalescer.h" -#include "LiveDebugVariables.h" -#include "VirtRegMap.h" - -#include "llvm/Pass.h" -#include "llvm/Value.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/RegisterClassInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/IR/Value.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include #include using namespace llvm; @@ -63,6 +57,17 @@ EnableJoining("join-liveintervals", cl::desc("Coalesce copies (default=true)"), cl::init(true)); +// Temporary flag to test critical edge unsplitting. +static cl::opt +EnableJoinSplits("join-splitedges", + cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden); + +// Temporary flag to test global copy optimization. +static cl::opt +EnableGlobalCopies("join-globalcopies", + cl::desc("Coalesce copies that span blocks (default=subtarget)"), + cl::init(cl::BOU_UNSET), cl::Hidden); + static cl::opt VerifyCoalescing("verify-coalescing", cl::desc("Verify machine instrs before and after register coalescing"), @@ -77,13 +82,21 @@ namespace { const TargetRegisterInfo* TRI; const TargetInstrInfo* TII; LiveIntervals *LIS; - LiveDebugVariables *LDV; const MachineLoopInfo* Loops; AliasAnalysis *AA; RegisterClassInfo RegClassInfo; + /// \brief True if the coalescer should aggressively coalesce global copies + /// in favor of keeping local copies. + bool JoinGlobalCopies; + + /// \brief True if the coalescer should aggressively coalesce fall-thru + /// blocks exclusively containing copies. + bool JoinSplitEdges; + /// WorkList - Copy instructions yet to be coalesced. SmallVector WorkList; + SmallVector LocalWorkList; /// ErasedInstrs - Set of instruction pointers that have been erased, and /// that may be present in WorkList. @@ -101,6 +114,9 @@ namespace { /// LiveRangeEdit callback. void LRE_WillEraseInstruction(MachineInstr *MI); + /// coalesceLocals - coalesce the LocalWorkList. + void coalesceLocals(); + /// joinAllIntervals - join compatible live intervals void joinAllIntervals(); @@ -108,9 +124,9 @@ namespace { /// copies that cannot yet be coalesced into WorkList. void copyCoalesceInMBB(MachineBasicBlock *MBB); - /// copyCoalesceWorkList - Try to coalesce all copies in WorkList after - /// position From. Return true if any progress was made. - bool copyCoalesceWorkList(unsigned From = 0); + /// copyCoalesceWorkList - Try to coalesce all copies in CurrList. Return + /// true if any progress was made. + bool copyCoalesceWorkList(MutableArrayRef CurrList); /// joinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, /// which are the src/dst of the copy instruction CopyMI. This returns @@ -150,11 +166,10 @@ namespace { /// reMaterializeTrivialDef - If the source of a copy is defined by a /// trivial computation, replace the copy by rematerialize the definition. - bool reMaterializeTrivialDef(LiveInterval &SrcInt, unsigned DstReg, - MachineInstr *CopyMI); + bool reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI); /// canJoinPhys - Return true if a physreg copy should be joined. - bool canJoinPhys(CoalescerPair &CP); + bool canJoinPhys(const CoalescerPair &CP); /// updateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and /// update the subregister number if it is not zero. If DstReg is a @@ -189,7 +204,6 @@ char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", "Simple Register Coalescing", false, false) INITIALIZE_PASS_DEPENDENCY(LiveIntervals) -INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) INITIALIZE_PASS_DEPENDENCY(SlotIndexes) INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) INITIALIZE_AG_DEPENDENCY(AliasAnalysis) @@ -217,6 +231,23 @@ static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, return true; } +// Return true if this block should be vacated by the coalescer to eliminate +// branches. The important cases to handle in the coalescer are critical edges +// split during phi elimination which contain only copies. Simple blocks that +// contain non-branches should also be vacated, but this can be handled by an +// earlier pass similar to early if-conversion. +static bool isSplitEdge(const MachineBasicBlock *MBB) { + if (MBB->pred_size() != 1 || MBB->succ_size() != 1) + return false; + + for (MachineBasicBlock::const_iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) { + if (!MII->isCopyLike() && !MII->isUnconditionalBranch()) + return false; + } + return true; +} + bool CoalescerPair::setRegisters(const MachineInstr *MI) { SrcReg = DstReg = 0; SrcIdx = DstIdx = 0; @@ -358,8 +389,6 @@ void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { AU.addRequired(); AU.addRequired(); AU.addPreserved(); - AU.addRequired(); - AU.addPreserved(); AU.addPreserved(); AU.addRequired(); AU.addPreserved(); @@ -701,9 +730,14 @@ bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP, /// reMaterializeTrivialDef - If the source of a copy is defined by a trivial /// computation, replace the copy by rematerialize the definition. -bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, - unsigned DstReg, +bool RegisterCoalescer::reMaterializeTrivialDef(CoalescerPair &CP, MachineInstr *CopyMI) { + unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg(); + unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg)) + return false; + + LiveInterval &SrcInt = LIS->getInterval(SrcReg); SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); assert(SrcLR != SrcInt.end() && "Live range not found!"); @@ -724,13 +758,17 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, const MCInstrDesc &MCID = DefMI->getDesc(); if (MCID.getNumDefs() != 1) return false; + // Only support subregister destinations when the def is read-undef. + MachineOperand &DstOperand = CopyMI->getOperand(0); + if (DstOperand.getSubReg() && !DstOperand.isUndef()) + return false; if (!DefMI->isImplicitDef()) { // Make sure the copy destination register class fits the instruction // definition register class. The mismatch can happen as a result of earlier // extract_subreg, insert_subreg, subreg_to_reg coalescing. const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI, *MF); if (TargetRegisterInfo::isVirtualRegister(DstReg)) { - if (MRI->getRegClass(DstReg) != RC) + if (!MRI->constrainRegClass(DstReg, RC)) return false; } else if (!RC->contains(DstReg)) return false; @@ -742,6 +780,12 @@ bool RegisterCoalescer::reMaterializeTrivialDef(LiveInterval &SrcInt, TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); MachineInstr *NewMI = prior(MII); + // The original DefMI may have been a subregister def, but the full register + // class of its destination matches the destination of CopyMI, and CopyMI is + // either a full register def or is read-undef. Therefore we can clear the + // subregister index on the rematerialized instruction. + NewMI->getOperand(0).setSubReg(0); + // NewMI may have dead implicit defs (E.g. EFLAGS for MOVr0 on X86). // We need to remember these so we can add intervals once we insert // NewMI into SlotIndexes. @@ -847,9 +891,6 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); LiveInterval *DstInt = DstIsPhys ? 0 : &LIS->getInterval(DstReg); - // Update LiveDebugVariables. - LDV->renameRegister(SrcReg, DstReg, SubIdx); - SmallPtrSet Visited; for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); MachineInstr *UseMI = I.skipInstruction();) { @@ -896,7 +937,7 @@ void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg, } /// canJoinPhys - Return true if a copy involving a physreg should be joined. -bool RegisterCoalescer::canJoinPhys(CoalescerPair &CP) { +bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) { /// Always join simple intervals that are defined by a single copy from a /// reserved register. This doesn't increase register pressure, so it is /// always beneficial. @@ -974,9 +1015,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { if (!canJoinPhys(CP)) { // Before giving up coalescing, if definition of source is defined by // trivial computation, try rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; return false; } @@ -1009,9 +1048,7 @@ bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) { // If definition of source is defined by trivial computation, try // rematerializing it. - if (!CP.isFlipped() && - reMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), - CP.getDstReg(), CopyMI)) + if (reMaterializeTrivialDef(CP, CopyMI)) return true; // If we can eliminate the copy without merging the live ranges, do so now. @@ -1246,8 +1283,18 @@ class JoinVals { // Value in the other live range that overlaps this def, if any. VNInfo *OtherVNI; - // Is this value an IMPLICIT_DEF? - bool IsImplicitDef; + // Is this value an IMPLICIT_DEF that can be erased? + // + // IMPLICIT_DEF values should only exist at the end of a basic block that + // is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be + // safely erased if they are overlapping a live value in the other live + // interval. + // + // Weird control flow graphs and incomplete PHI handling in + // ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with + // longer live ranges. Such IMPLICIT_DEF values should be treated like + // normal values. + bool ErasableImplicitDef; // True when the live range of this value will be pruned because of an // overlapping CR_Replace value in the other live range. @@ -1257,8 +1304,8 @@ class JoinVals { bool PrunedComputed; Val() : Resolution(CR_Keep), WriteLanes(0), ValidLanes(0), - RedefVNI(0), OtherVNI(0), IsImplicitDef(false), Pruned(false), - PrunedComputed(false) {} + RedefVNI(0), OtherVNI(0), ErasableImplicitDef(false), + Pruned(false), PrunedComputed(false) {} bool isAnalyzed() const { return WriteLanes != 0; } }; @@ -1396,7 +1443,10 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // An IMPLICIT_DEF writes undef values. if (DefMI->isImplicitDef()) { - V.IsImplicitDef = true; + // We normally expect IMPLICIT_DEF values to be live only until the end + // of their block. If the value is really live longer and gets pruned in + // another block, this flag is cleared again. + V.ErasableImplicitDef = true; V.ValidLanes &= ~V.WriteLanes; } } @@ -1449,7 +1499,22 @@ JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) { // We have overlapping values, or possibly a kill of Other. // Recursively compute assignments up the dominator tree. Other.computeAssignment(V.OtherVNI->id, *this); - const Val &OtherV = Other.Vals[V.OtherVNI->id]; + Val &OtherV = Other.Vals[V.OtherVNI->id]; + + // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block. + // This shouldn't normally happen, but ProcessImplicitDefs can leave such + // IMPLICIT_DEF instructions behind, and there is nothing wrong with it + // technically. + // + // WHen it happens, treat that IMPLICIT_DEF as a normal value, and don't try + // to erase the IMPLICIT_DEF instruction. + if (OtherV.ErasableImplicitDef && DefMI && + DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) { + DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def + << " extends into BB#" << DefMI->getParent()->getNumber() + << ", keeping it.\n"); + OtherV.ErasableImplicitDef = false; + } // Allow overlapping PHI values. Any real interference would show up in a // predecessor, the PHI itself can't introduce any conflicts. @@ -1758,7 +1823,8 @@ void JoinVals::pruneValues(JoinVals &Other, // predecessors, so the instruction should simply go away once its value // has been replaced. Val &OtherV = Other.Vals[Vals[i].OtherVNI->id]; - bool EraseImpDef = OtherV.IsImplicitDef && OtherV.Resolution == CR_Keep; + bool EraseImpDef = OtherV.ErasableImplicitDef && + OtherV.Resolution == CR_Keep; if (!Def.isBlock()) { // Remove flags. This def is now a partial redef. // Also remove flags since the joined live range will @@ -1807,7 +1873,7 @@ void JoinVals::eraseInstrs(SmallPtrSet &ErasedInstrs, // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any // longer. The IMPLICIT_DEF instructions are only inserted by // PHIElimination to guarantee that all PHI predecessors have a value. - if (!Vals[i].IsImplicitDef || !Vals[i].Pruned) + if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned) break; // Remove value number i from LI. Note that this VNInfo is still present // in NewVNInfo, so it will appear as an unused value number in the final @@ -1904,47 +1970,77 @@ bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) { } namespace { - // DepthMBBCompare - Comparison predicate that sort first based on the loop - // depth of the basic block (the unsigned), and then on the MBB number. - struct DepthMBBCompare { - typedef std::pair DepthMBBPair; - bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { - // Deeper loops first - if (LHS.first != RHS.first) - return LHS.first > RHS.first; - - // Prefer blocks that are more connected in the CFG. This takes care of - // the most difficult copies first while intervals are short. - unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); - unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); - if (cl != cr) - return cl > cr; - - // As a last resort, sort by block number. - return LHS.second->getNumber() < RHS.second->getNumber(); - } - }; +// Information concerning MBB coalescing priority. +struct MBBPriorityInfo { + MachineBasicBlock *MBB; + unsigned Depth; + bool IsSplit; + + MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit) + : MBB(mbb), Depth(depth), IsSplit(issplit) {} +}; +} + +// C-style comparator that sorts first based on the loop depth of the basic +// block (the unsigned), and then on the MBB number. +// +// EnableGlobalCopies assumes that the primary sort key is loop depth. +static int compareMBBPriority(const void *L, const void *R) { + const MBBPriorityInfo *LHS = static_cast(L); + const MBBPriorityInfo *RHS = static_cast(R); + // Deeper loops first + if (LHS->Depth != RHS->Depth) + return LHS->Depth > RHS->Depth ? -1 : 1; + + // Try to unsplit critical edges next. + if (LHS->IsSplit != RHS->IsSplit) + return LHS->IsSplit ? -1 : 1; + + // Prefer blocks that are more connected in the CFG. This takes care of + // the most difficult copies first while intervals are short. + unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size(); + unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size(); + if (cl != cr) + return cl > cr ? -1 : 1; + + // As a last resort, sort by block number. + return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1; +} + +/// \returns true if the given copy uses or defines a local live range. +static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) { + if (!Copy->isCopy()) + return false; + + unsigned SrcReg = Copy->getOperand(1).getReg(); + unsigned DstReg = Copy->getOperand(0).getReg(); + if (TargetRegisterInfo::isPhysicalRegister(SrcReg) + || TargetRegisterInfo::isPhysicalRegister(DstReg)) + return false; + + return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg)) + || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg)); } // Try joining WorkList copies starting from index From. // Null out any successful joins. -bool RegisterCoalescer::copyCoalesceWorkList(unsigned From) { - assert(From <= WorkList.size() && "Out of range"); +bool RegisterCoalescer:: +copyCoalesceWorkList(MutableArrayRef CurrList) { bool Progress = false; - for (unsigned i = From, e = WorkList.size(); i != e; ++i) { - if (!WorkList[i]) + for (unsigned i = 0, e = CurrList.size(); i != e; ++i) { + if (!CurrList[i]) continue; // Skip instruction pointers that have already been erased, for example by // dead code elimination. - if (ErasedInstrs.erase(WorkList[i])) { - WorkList[i] = 0; + if (ErasedInstrs.erase(CurrList[i])) { + CurrList[i] = 0; continue; } bool Again = false; - bool Success = joinCopy(WorkList[i], Again); + bool Success = joinCopy(CurrList[i], Again); Progress |= Success; if (Success || !Again) - WorkList[i] = 0; + CurrList[i] = 0; } return Progress; } @@ -1956,52 +2052,74 @@ RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) { // Collect all copy-like instructions in MBB. Don't start coalescing anything // yet, it might invalidate the iterator. const unsigned PrevSize = WorkList.size(); - for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); - MII != E; ++MII) - if (MII->isCopyLike()) - WorkList.push_back(MII); - + if (JoinGlobalCopies) { + // Coalesce copies bottom-up to coalesce local defs before local uses. They + // are not inherently easier to resolve, but slightly preferable until we + // have local live range splitting. In particular this is required by + // cmp+jmp macro fusion. + for (MachineBasicBlock::reverse_iterator + MII = MBB->rbegin(), E = MBB->rend(); MII != E; ++MII) { + if (!MII->isCopyLike()) + continue; + if (isLocalCopy(&(*MII), LIS)) + LocalWorkList.push_back(&(*MII)); + else + WorkList.push_back(&(*MII)); + } + } + else { + for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); + MII != E; ++MII) + if (MII->isCopyLike()) + WorkList.push_back(MII); + } // Try coalescing the collected copies immediately, and remove the nulls. // This prevents the WorkList from getting too large since most copies are // joinable on the first attempt. - if (copyCoalesceWorkList(PrevSize)) + MutableArrayRef + CurrList(WorkList.begin() + PrevSize, WorkList.end()); + if (copyCoalesceWorkList(CurrList)) WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(), (MachineInstr*)0), WorkList.end()); } +void RegisterCoalescer::coalesceLocals() { + copyCoalesceWorkList(LocalWorkList); + for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) { + if (LocalWorkList[j]) + WorkList.push_back(LocalWorkList[j]); + } + LocalWorkList.clear(); +} + void RegisterCoalescer::joinAllIntervals() { DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); - assert(WorkList.empty() && "Old data still around."); - - if (Loops->empty()) { - // If there are no loops in the function, join intervals in function order. - for (MachineFunction::iterator I = MF->begin(), E = MF->end(); - I != E; ++I) - copyCoalesceInMBB(I); - } else { - // Otherwise, join intervals in inner loops before other intervals. - // Unfortunately we can't just iterate over loop hierarchy here because - // there may be more MBB's than BB's. Collect MBB's for sorting. - - // Join intervals in the function prolog first. We want to join physical - // registers with virtual registers before the intervals got too long. - std::vector > MBBs; - for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ - MachineBasicBlock *MBB = I; - MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); + assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around."); + + std::vector MBBs; + MBBs.reserve(MF->size()); + for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ + MachineBasicBlock *MBB = I; + MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB), + JoinSplitEdges && isSplitEdge(MBB))); + } + array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority); + + // Coalesce intervals in MBB priority order. + unsigned CurrDepth = UINT_MAX; + for (unsigned i = 0, e = MBBs.size(); i != e; ++i) { + // Try coalescing the collected local copies for deeper loops. + if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) { + coalesceLocals(); + CurrDepth = MBBs[i].Depth; } - - // Sort by loop depth. - std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); - - // Finally, join intervals in loop nest order. - for (unsigned i = 0, e = MBBs.size(); i != e; ++i) - copyCoalesceInMBB(MBBs[i].second); + copyCoalesceInMBB(MBBs[i].MBB); } + coalesceLocals(); // Joining intervals can allow other intervals to be joined. Iteratively join // until we make no progress. - while (copyCoalesceWorkList()) + while (copyCoalesceWorkList(WorkList)) /* empty */ ; } @@ -2019,10 +2137,20 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { TRI = TM->getRegisterInfo(); TII = TM->getInstrInfo(); LIS = &getAnalysis(); - LDV = &getAnalysis(); AA = &getAnalysis(); Loops = &getAnalysis(); + const TargetSubtargetInfo &ST = TM->getSubtarget(); + if (EnableGlobalCopies == cl::BOU_UNSET) + JoinGlobalCopies = ST.enableMachineScheduler(); + else + JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE); + + // The MachineScheduler does not currently require JoinSplitEdges. This will + // either be enabled unconditionally or replaced by a more general live range + // splitting optimization. + JoinSplitEdges = EnableJoinSplits; + DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" << "********** Function: " << MF->getName() << '\n'); @@ -2054,7 +2182,6 @@ bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { } DEBUG(dump()); - DEBUG(LDV->dump()); if (VerifyCoalescing) MF->verify(this, "After register coalescing"); return true; diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 543c426458d7..97f22e1049f6 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -12,25 +12,22 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/RegisterPressure.h" #include "llvm/CodeGen/LiveInterval.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/RegisterClassInfo.h" -#include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; -/// Increase register pressure for each set impacted by this register class. +/// Increase pressure for each pressure set provided by TargetRegisterInfo. static void increaseSetPressure(std::vector &CurrSetPressure, std::vector &MaxSetPressure, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) { - unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; - for (const int *PSet = TRI->getRegClassPressureSets(RC); - *PSet != -1; ++PSet) { + const int *PSet, unsigned Weight) { + for (; *PSet != -1; ++PSet) { CurrSetPressure[*PSet] += Weight; if (&CurrSetPressure != &MaxSetPressure && CurrSetPressure[*PSet] > MaxSetPressure[*PSet]) { @@ -39,32 +36,57 @@ static void increaseSetPressure(std::vector &CurrSetPressure, } } -/// Decrease register pressure for each set impacted by this register class. +/// Decrease pressure for each pressure set provided by TargetRegisterInfo. static void decreaseSetPressure(std::vector &CurrSetPressure, - const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) { - unsigned Weight = TRI->getRegClassWeight(RC).RegWeight; - for (const int *PSet = TRI->getRegClassPressureSets(RC); - *PSet != -1; ++PSet) { + const int *PSet, unsigned Weight) { + for (; *PSet != -1; ++PSet) { assert(CurrSetPressure[*PSet] >= Weight && "register pressure underflow"); CurrSetPressure[*PSet] -= Weight; } } /// Directly increase pressure only within this RegisterPressure result. -void RegisterPressure::increase(const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) { - increaseSetPressure(MaxSetPressure, MaxSetPressure, RC, TRI); +void RegisterPressure::increase(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + increaseSetPressure(MaxSetPressure, MaxSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + increaseSetPressure(MaxSetPressure, MaxSetPressure, + TRI->getRegUnitPressureSets(Reg), + TRI->getRegUnitWeight(Reg)); + } } /// Directly decrease pressure only within this RegisterPressure result. -void RegisterPressure::decrease(const TargetRegisterClass *RC, - const TargetRegisterInfo *TRI) { - decreaseSetPressure(MaxSetPressure, RC, TRI); +void RegisterPressure::decrease(unsigned Reg, const TargetRegisterInfo *TRI, + const MachineRegisterInfo *MRI) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + const TargetRegisterClass *RC = MRI->getRegClass(Reg); + decreaseSetPressure(MaxSetPressure, TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + decreaseSetPressure(MaxSetPressure, TRI->getRegUnitPressureSets(Reg), + TRI->getRegUnitWeight(Reg)); + } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) +static void dumpSetPressure(const std::vector &SetPressure, + const TargetRegisterInfo *TRI) { + for (unsigned i = 0, e = SetPressure.size(); i < e; ++i) { + if (SetPressure[i] != 0) + dbgs() << TRI->getRegPressureSetName(i) << "=" << SetPressure[i] << '\n'; + } +} + void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { + dbgs() << "Max Pressure: "; + dumpSetPressure(MaxSetPressure, TRI); dbgs() << "Live In: "; for (unsigned i = 0, e = LiveInRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveInRegs[i], TRI) << " "; @@ -73,42 +95,47 @@ void RegisterPressure::dump(const TargetRegisterInfo *TRI) const { for (unsigned i = 0, e = LiveOutRegs.size(); i < e; ++i) dbgs() << PrintReg(LiveOutRegs[i], TRI) << " "; dbgs() << '\n'; - for (unsigned i = 0, e = MaxSetPressure.size(); i < e; ++i) { - if (MaxSetPressure[i] != 0) - dbgs() << TRI->getRegPressureSetName(i) << "=" << MaxSetPressure[i] - << '\n'; - } -} -#endif - -/// Increase the current pressure as impacted by these physical registers and -/// bump the high water mark if needed. -void RegPressureTracker::increasePhysRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - TRI->getMinimalPhysRegClass(Regs[I]), TRI); } -/// Simply decrease the current pressure as impacted by these physcial -/// registers. -void RegPressureTracker::decreasePhysRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) - decreaseSetPressure(CurrSetPressure, TRI->getMinimalPhysRegClass(Regs[I]), - TRI); +void RegPressureTracker::dump() const { + dbgs() << "Curr Pressure: "; + dumpSetPressure(CurrSetPressure, TRI); + P.dump(TRI); } +#endif -/// Increase the current pressure as impacted by these virtual registers and -/// bump the high water mark if needed. -void RegPressureTracker::increaseVirtRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) - increaseSetPressure(CurrSetPressure, P.MaxSetPressure, - MRI->getRegClass(Regs[I]), TRI); +/// Increase the current pressure as impacted by these registers and bump +/// the high water mark if needed. +void RegPressureTracker::increaseRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) { + if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { + const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + increaseSetPressure(CurrSetPressure, P.MaxSetPressure, + TRI->getRegUnitPressureSets(Regs[I]), + TRI->getRegUnitWeight(Regs[I])); + } + } } -/// Simply decrease the current pressure as impacted by these virtual registers. -void RegPressureTracker::decreaseVirtRegPressure(ArrayRef Regs) { - for (unsigned I = 0, E = Regs.size(); I != E; ++I) - decreaseSetPressure(CurrSetPressure, MRI->getRegClass(Regs[I]), TRI); +/// Simply decrease the current pressure as impacted by these registers. +void RegPressureTracker::decreaseRegPressure(ArrayRef Regs) { + for (unsigned I = 0, E = Regs.size(); I != E; ++I) { + if (TargetRegisterInfo::isVirtualRegister(Regs[I])) { + const TargetRegisterClass *RC = MRI->getRegClass(Regs[I]); + decreaseSetPressure(CurrSetPressure, + TRI->getRegClassPressureSets(RC), + TRI->getRegClassWeight(RC).RegWeight); + } + else { + decreaseSetPressure(CurrSetPressure, TRI->getRegUnitPressureSets(Regs[I]), + TRI->getRegUnitWeight(Regs[I])); + } + } } /// Clear the result so it can be used for another round of pressure tracking. @@ -160,6 +187,12 @@ void RegionPressure::openBottom(MachineBasicBlock::const_iterator PrevBottom) { LiveInRegs.clear(); } +const LiveInterval *RegPressureTracker::getInterval(unsigned Reg) const { + if (TargetRegisterInfo::isVirtualRegister(Reg)) + return &LIS->getInterval(Reg); + return LIS->getCachedRegUnit(Reg); +} + /// Setup the RegPressureTracker. /// /// TODO: Add support for pressure without LiveIntervals. @@ -181,9 +214,6 @@ void RegPressureTracker::init(const MachineFunction *mf, } CurrPos = pos; - while (CurrPos != MBB->end() && CurrPos->isDebugValue()) - ++CurrPos; - CurrSetPressure.assign(TRI->getNumRegPressureSets(), 0); if (RequireIntervals) @@ -192,10 +222,10 @@ void RegPressureTracker::init(const MachineFunction *mf, static_cast(P).reset(); P.MaxSetPressure = CurrSetPressure; - LivePhysRegs.clear(); - LivePhysRegs.setUniverse(TRI->getNumRegs()); - LiveVirtRegs.clear(); - LiveVirtRegs.setUniverse(MRI->getNumVirtRegs()); + LiveRegs.PhysRegs.clear(); + LiveRegs.PhysRegs.setUniverse(TRI->getNumRegs()); + LiveRegs.VirtRegs.clear(); + LiveRegs.VirtRegs.setUniverse(MRI->getNumVirtRegs()); } /// Does this pressure result have a valid top position and live ins. @@ -214,19 +244,28 @@ bool RegPressureTracker::isBottomClosed() const { MachineBasicBlock::const_iterator()); } + +SlotIndex RegPressureTracker::getCurrSlot() const { + MachineBasicBlock::const_iterator IdxPos = CurrPos; + while (IdxPos != MBB->end() && IdxPos->isDebugValue()) + ++IdxPos; + if (IdxPos == MBB->end()) + return LIS->getMBBEndIdx(MBB); + return LIS->getInstructionIndex(IdxPos).getRegSlot(); +} + /// Set the boundary for the top of the region and summarize live ins. void RegPressureTracker::closeTop() { if (RequireIntervals) - static_cast(P).TopIdx = - LIS->getInstructionIndex(CurrPos).getRegSlot(); + static_cast(P).TopIdx = getCurrSlot(); else static_cast(P).TopPos = CurrPos; assert(P.LiveInRegs.empty() && "inconsistent max pressure result"); - P.LiveInRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); - P.LiveInRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + P.LiveInRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); + P.LiveInRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); for (SparseSet::const_iterator I = - LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) P.LiveInRegs.push_back(*I); std::sort(P.LiveInRegs.begin(), P.LiveInRegs.end()); P.LiveInRegs.erase(std::unique(P.LiveInRegs.begin(), P.LiveInRegs.end()), @@ -236,19 +275,15 @@ void RegPressureTracker::closeTop() { /// Set the boundary for the bottom of the region and summarize live outs. void RegPressureTracker::closeBottom() { if (RequireIntervals) - if (CurrPos == MBB->end()) - static_cast(P).BottomIdx = LIS->getMBBEndIdx(MBB); - else - static_cast(P).BottomIdx = - LIS->getInstructionIndex(CurrPos).getRegSlot(); + static_cast(P).BottomIdx = getCurrSlot(); else static_cast(P).BottomPos = CurrPos; assert(P.LiveOutRegs.empty() && "inconsistent max pressure result"); - P.LiveOutRegs.reserve(LivePhysRegs.size() + LiveVirtRegs.size()); - P.LiveOutRegs.append(LivePhysRegs.begin(), LivePhysRegs.end()); + P.LiveOutRegs.reserve(LiveRegs.PhysRegs.size() + LiveRegs.VirtRegs.size()); + P.LiveOutRegs.append(LiveRegs.PhysRegs.begin(), LiveRegs.PhysRegs.end()); for (SparseSet::const_iterator I = - LiveVirtRegs.begin(), E = LiveVirtRegs.end(); I != E; ++I) + LiveRegs.VirtRegs.begin(), E = LiveRegs.VirtRegs.end(); I != E; ++I) P.LiveOutRegs.push_back(*I); std::sort(P.LiveOutRegs.begin(), P.LiveOutRegs.end()); P.LiveOutRegs.erase(std::unique(P.LiveOutRegs.begin(), P.LiveOutRegs.end()), @@ -258,7 +293,7 @@ void RegPressureTracker::closeBottom() { /// Finalize the region boundaries and record live ins and live outs. void RegPressureTracker::closeRegion() { if (!isTopClosed() && !isBottomClosed()) { - assert(LivePhysRegs.empty() && LiveVirtRegs.empty() && + assert(LiveRegs.PhysRegs.empty() && LiveRegs.VirtRegs.empty() && "no region boundary"); return; } @@ -269,151 +304,97 @@ void RegPressureTracker::closeRegion() { // If both top and bottom are closed, do nothing. } -/// Return true if Reg aliases a register in Regs SparseSet. -static bool hasRegAlias(unsigned Reg, SparseSet &Regs, - const TargetRegisterInfo *TRI) { - assert(!TargetRegisterInfo::isVirtualRegister(Reg) && "only for physregs"); - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - if (Regs.count(*AI)) - return true; - return false; -} - -/// Return true if Reg aliases a register in unsorted Regs SmallVector. -/// This is only valid for physical registers. -static SmallVectorImpl::iterator -findRegAlias(unsigned Reg, SmallVectorImpl &Regs, - const TargetRegisterInfo *TRI) { - for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) { - SmallVectorImpl::iterator I = - std::find(Regs.begin(), Regs.end(), *AI); - if (I != Regs.end()) - return I; - } - return Regs.end(); -} - -/// Return true if Reg can be inserted into Regs SmallVector. For virtual -/// register, do a linear search. For physical registers check for aliases. -static SmallVectorImpl::iterator -findReg(unsigned Reg, bool isVReg, SmallVectorImpl &Regs, - const TargetRegisterInfo *TRI) { - if(isVReg) - return std::find(Regs.begin(), Regs.end(), Reg); - return findRegAlias(Reg, Regs, TRI); +/// \brief Convenient wrapper for checking membership in RegisterOperands. +static bool containsReg(ArrayRef Regs, unsigned Reg) { + return std::find(Regs.begin(), Regs.end(), Reg) != Regs.end(); } /// Collect this instruction's unique uses and defs into SmallVectors for /// processing defs and uses in order. -template -struct RegisterOperands { +class RegisterOperands { + const TargetRegisterInfo *TRI; + const MachineRegisterInfo *MRI; + +public: SmallVector Uses; SmallVector Defs; SmallVector DeadDefs; + RegisterOperands(const TargetRegisterInfo *tri, + const MachineRegisterInfo *mri): TRI(tri), MRI(mri) {} + /// Push this operand's register onto the correct vector. - void collect(const MachineOperand &MO, const TargetRegisterInfo *TRI) { - if (MO.readsReg()) { - if (findReg(MO.getReg(), isVReg, Uses, TRI) == Uses.end()) - Uses.push_back(MO.getReg()); - } + void collect(const MachineOperand &MO) { + if (!MO.isReg() || !MO.getReg()) + return; + if (MO.readsReg()) + pushRegUnits(MO.getReg(), Uses); if (MO.isDef()) { - if (MO.isDead()) { - if (findReg(MO.getReg(), isVReg, DeadDefs, TRI) == DeadDefs.end()) - DeadDefs.push_back(MO.getReg()); + if (MO.isDead()) + pushRegUnits(MO.getReg(), DeadDefs); + else + pushRegUnits(MO.getReg(), Defs); + } + } + +protected: + void pushRegUnits(unsigned Reg, SmallVectorImpl &Regs) { + if (TargetRegisterInfo::isVirtualRegister(Reg)) { + if (containsReg(Regs, Reg)) + return; + Regs.push_back(Reg); + } + else if (MRI->isAllocatable(Reg)) { + for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units) { + if (containsReg(Regs, *Units)) + continue; + Regs.push_back(*Units); } - else if (findReg(MO.getReg(), isVReg, Defs, TRI) == Defs.end()) - Defs.push_back(MO.getReg()); } } }; -typedef RegisterOperands PhysRegOperands; -typedef RegisterOperands VirtRegOperands; /// Collect physical and virtual register operands. static void collectOperands(const MachineInstr *MI, - PhysRegOperands &PhysRegOpers, - VirtRegOperands &VirtRegOpers, - const TargetRegisterInfo *TRI, - const MachineRegisterInfo *MRI) { - for(ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) { - const MachineOperand &MO = *OperI; - if (!MO.isReg() || !MO.getReg()) - continue; + RegisterOperands &RegOpers) { + for (ConstMIBundleOperands OperI(MI); OperI.isValid(); ++OperI) + RegOpers.collect(*OperI); - if (TargetRegisterInfo::isVirtualRegister(MO.getReg())) - VirtRegOpers.collect(MO, TRI); - else if (MRI->isAllocatable(MO.getReg())) - PhysRegOpers.collect(MO, TRI); - } // Remove redundant physreg dead defs. - for (unsigned i = PhysRegOpers.DeadDefs.size(); i > 0; --i) { - unsigned Reg = PhysRegOpers.DeadDefs[i-1]; - if (findRegAlias(Reg, PhysRegOpers.Defs, TRI) != PhysRegOpers.Defs.end()) - PhysRegOpers.DeadDefs.erase(&PhysRegOpers.DeadDefs[i-1]); - } + SmallVectorImpl::iterator I = + std::remove_if(RegOpers.DeadDefs.begin(), RegOpers.DeadDefs.end(), + std::bind1st(std::ptr_fun(containsReg), RegOpers.Defs)); + RegOpers.DeadDefs.erase(I, RegOpers.DeadDefs.end()); } /// Force liveness of registers. void RegPressureTracker::addLiveRegs(ArrayRef Regs) { for (unsigned i = 0, e = Regs.size(); i != e; ++i) { - if (TargetRegisterInfo::isVirtualRegister(Regs[i])) { - if (LiveVirtRegs.insert(Regs[i]).second) - increaseVirtRegPressure(Regs[i]); - } - else { - if (!hasRegAlias(Regs[i], LivePhysRegs, TRI)) { - LivePhysRegs.insert(Regs[i]); - increasePhysRegPressure(Regs[i]); - } - } + if (LiveRegs.insert(Regs[i])) + increaseRegPressure(Regs[i]); } } -/// Add PhysReg to the live in set and increase max pressure. -void RegPressureTracker::discoverPhysLiveIn(unsigned Reg) { - assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); - if (findRegAlias(Reg, P.LiveInRegs, TRI) != P.LiveInRegs.end()) +/// Add Reg to the live in set and increase max pressure. +void RegPressureTracker::discoverLiveIn(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveInRegs, Reg)) return; // At live in discovery, unconditionally increase the high water mark. P.LiveInRegs.push_back(Reg); - P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); + P.increase(Reg, TRI, MRI); } -/// Add PhysReg to the live out set and increase max pressure. -void RegPressureTracker::discoverPhysLiveOut(unsigned Reg) { - assert(!LivePhysRegs.count(Reg) && "avoid bumping max pressure twice"); - if (findRegAlias(Reg, P.LiveOutRegs, TRI) != P.LiveOutRegs.end()) +/// Add Reg to the live out set and increase max pressure. +void RegPressureTracker::discoverLiveOut(unsigned Reg) { + assert(!LiveRegs.contains(Reg) && "avoid bumping max pressure twice"); + if (containsReg(P.LiveOutRegs, Reg)) return; // At live out discovery, unconditionally increase the high water mark. P.LiveOutRegs.push_back(Reg); - P.increase(TRI->getMinimalPhysRegClass(Reg), TRI); -} - -/// Add VirtReg to the live in set and increase max pressure. -void RegPressureTracker::discoverVirtLiveIn(unsigned Reg) { - assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); - if (std::find(P.LiveInRegs.begin(), P.LiveInRegs.end(), Reg) != - P.LiveInRegs.end()) - return; - - // At live in discovery, unconditionally increase the high water mark. - P.LiveInRegs.push_back(Reg); - P.increase(MRI->getRegClass(Reg), TRI); -} - -/// Add VirtReg to the live out set and increase max pressure. -void RegPressureTracker::discoverVirtLiveOut(unsigned Reg) { - assert(!LiveVirtRegs.count(Reg) && "avoid bumping max pressure twice"); - if (std::find(P.LiveOutRegs.begin(), P.LiveOutRegs.end(), Reg) != - P.LiveOutRegs.end()) - return; - - // At live out discovery, unconditionally increase the high water mark. - P.LiveOutRegs.push_back(Reg); - P.increase(MRI->getRegClass(Reg), TRI); + P.increase(Reg, TRI, MRI); } /// Recede across the previous instruction. @@ -447,52 +428,35 @@ bool RegPressureTracker::recede() { if (RequireIntervals && isTopClosed()) static_cast(P).openTop(SlotIdx); - PhysRegOperands PhysRegOpers; - VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); + RegisterOperands RegOpers(TRI, MRI); + collectOperands(CurrPos, RegOpers); // Boost pressure for all dead defs together. - increasePhysRegPressure(PhysRegOpers.DeadDefs); - increaseVirtRegPressure(VirtRegOpers.DeadDefs); - decreasePhysRegPressure(PhysRegOpers.DeadDefs); - decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); // Kill liveness at live defs. // TODO: consider earlyclobbers? - for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Defs[i]; - if (LivePhysRegs.erase(Reg)) - decreasePhysRegPressure(Reg); - else - discoverPhysLiveOut(Reg); - } - for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Defs[i]; - if (LiveVirtRegs.erase(Reg)) - decreaseVirtRegPressure(Reg); + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); else - discoverVirtLiveOut(Reg); + discoverLiveOut(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Uses[i]; - if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { - increasePhysRegPressure(Reg); - LivePhysRegs.insert(Reg); - } - } - for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Uses[i]; - if (!LiveVirtRegs.count(Reg)) { + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveInterval *LI = &LIS->getInterval(Reg); - if (!LI->killedAt(SlotIdx)) - discoverVirtLiveOut(Reg); + const LiveInterval *LI = getInterval(Reg); + if (LI && !LI->killedAt(SlotIdx)) + discoverLiveOut(Reg); } - increaseVirtRegPressure(Reg); - LiveVirtRegs.insert(Reg); + increaseRegPressure(Reg); + LiveRegs.insert(Reg); } } return true; @@ -510,7 +474,7 @@ bool RegPressureTracker::advance() { SlotIndex SlotIdx; if (RequireIntervals) - SlotIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); + SlotIdx = getCurrSlot(); // Open the bottom of the region using slot indexes. if (isBottomClosed()) { @@ -520,57 +484,43 @@ bool RegPressureTracker::advance() { static_cast(P).openBottom(CurrPos); } - PhysRegOperands PhysRegOpers; - VirtRegOperands VirtRegOpers; - collectOperands(CurrPos, PhysRegOpers, VirtRegOpers, TRI, MRI); - - // Kill liveness at last uses. - for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Uses[i]; - if (!hasRegAlias(Reg, LivePhysRegs, TRI)) - discoverPhysLiveIn(Reg); - else { - // Allocatable physregs are always single-use before regalloc. - decreasePhysRegPressure(Reg); - LivePhysRegs.erase(Reg); - } - } - for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Uses[i]; + RegisterOperands RegOpers(TRI, MRI); + collectOperands(CurrPos, RegOpers); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + // Discover live-ins. + bool isLive = LiveRegs.contains(Reg); + if (!isLive) + discoverLiveIn(Reg); + // Kill liveness at last uses. + bool lastUse = false; if (RequireIntervals) { - const LiveInterval *LI = &LIS->getInterval(Reg); - if (LI->killedAt(SlotIdx)) { - if (LiveVirtRegs.erase(Reg)) - decreaseVirtRegPressure(Reg); - else - discoverVirtLiveIn(Reg); - } + const LiveInterval *LI = getInterval(Reg); + lastUse = LI && LI->killedAt(SlotIdx); } - else if (!LiveVirtRegs.count(Reg)) { - discoverVirtLiveIn(Reg); - increaseVirtRegPressure(Reg); + else { + // Allocatable physregs are always single-use before register rewriting. + lastUse = !TargetRegisterInfo::isVirtualRegister(Reg); } + if (lastUse && isLive) { + LiveRegs.erase(Reg); + decreaseRegPressure(Reg); + } + else if (!lastUse && !isLive) + increaseRegPressure(Reg); } // Generate liveness for defs. - for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Defs[i]; - if (!hasRegAlias(Reg, LivePhysRegs, TRI)) { - increasePhysRegPressure(Reg); - LivePhysRegs.insert(Reg); - } - } - for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Defs[i]; - if (LiveVirtRegs.insert(Reg).second) - increaseVirtRegPressure(Reg); + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (LiveRegs.insert(Reg)) + increaseRegPressure(Reg); } // Boost pressure for all dead defs together. - increasePhysRegPressure(PhysRegOpers.DeadDefs); - increaseVirtRegPressure(VirtRegOpers.DeadDefs); - decreasePhysRegPressure(PhysRegOpers.DeadDefs); - decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); // Find the next instruction. do @@ -661,39 +611,28 @@ static void computeMaxPressureDelta(ArrayRef OldMaxPressureVec, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + // Account for register pressure similar to RegPressureTracker::recede(). - PhysRegOperands PhysRegOpers; - VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); + RegisterOperands RegOpers(TRI, MRI); + collectOperands(MI, RegOpers); // Boost max pressure for all dead defs together. // Since CurrSetPressure and MaxSetPressure - increasePhysRegPressure(PhysRegOpers.DeadDefs); - increaseVirtRegPressure(VirtRegOpers.DeadDefs); - decreasePhysRegPressure(PhysRegOpers.DeadDefs); - decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); // Kill liveness at live defs. - for (unsigned i = 0, e = PhysRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Defs[i]; - if (!findReg(Reg, false, PhysRegOpers.Uses, TRI)) - decreasePhysRegPressure(PhysRegOpers.Defs); - } - for (unsigned i = 0, e = VirtRegOpers.Defs.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Defs[i]; - if (!findReg(Reg, true, VirtRegOpers.Uses, TRI)) - decreaseVirtRegPressure(VirtRegOpers.Defs); + for (unsigned i = 0, e = RegOpers.Defs.size(); i < e; ++i) { + unsigned Reg = RegOpers.Defs[i]; + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); } // Generate liveness for uses. - for (unsigned i = 0, e = PhysRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = PhysRegOpers.Uses[i]; - if (!hasRegAlias(Reg, LivePhysRegs, TRI)) - increasePhysRegPressure(Reg); - } - for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Uses[i]; - if (!LiveVirtRegs.count(Reg)) - increaseVirtRegPressure(Reg); + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (!LiveRegs.contains(Reg)) + increaseRegPressure(Reg); } } @@ -740,6 +679,8 @@ static bool findUseBetween(unsigned Reg, UI = MRI->use_nodbg_begin(Reg), UE = MRI->use_nodbg_end(); UI != UE; UI.skipInstruction()) { const MachineInstr* MI = &*UI; + if (MI->isDebugValue()) + continue; SlotIndex InstSlot = LIS->getInstructionIndex(MI).getRegSlot(); if (InstSlot >= PriorUseIdx && InstSlot < NextUseIdx) return true; @@ -754,38 +695,42 @@ static bool findUseBetween(unsigned Reg, /// This is intended for speculative queries. It leaves pressure inconsistent /// with the current position, so must be restored by the caller. void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { + assert(!MI->isDebugValue() && "Expect a nondebug instruction."); + // Account for register pressure similar to RegPressureTracker::recede(). - PhysRegOperands PhysRegOpers; - VirtRegOperands VirtRegOpers; - collectOperands(MI, PhysRegOpers, VirtRegOpers, TRI, MRI); + RegisterOperands RegOpers(TRI, MRI); + collectOperands(MI, RegOpers); // Kill liveness at last uses. Assume allocatable physregs are single-use // rather than checking LiveIntervals. - decreasePhysRegPressure(PhysRegOpers.Uses); - if (RequireIntervals) { - SlotIndex SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); - for (unsigned i = 0, e = VirtRegOpers.Uses.size(); i < e; ++i) { - unsigned Reg = VirtRegOpers.Uses[i]; - const LiveInterval *LI = &LIS->getInterval(Reg); - // FIXME: allow the caller to pass in the list of vreg uses that remain to - // be bottom-scheduled to avoid searching uses at each query. - SlotIndex CurrIdx = LIS->getInstructionIndex(CurrPos).getRegSlot(); - if (LI->killedAt(SlotIdx) + SlotIndex SlotIdx; + if (RequireIntervals) + SlotIdx = LIS->getInstructionIndex(MI).getRegSlot(); + + for (unsigned i = 0, e = RegOpers.Uses.size(); i < e; ++i) { + unsigned Reg = RegOpers.Uses[i]; + if (RequireIntervals) { + // FIXME: allow the caller to pass in the list of vreg uses that remain + // to be bottom-scheduled to avoid searching uses at each query. + SlotIndex CurrIdx = getCurrSlot(); + const LiveInterval *LI = getInterval(Reg); + if (LI && LI->killedAt(SlotIdx) && !findUseBetween(Reg, CurrIdx, SlotIdx, MRI, LIS)) { - decreaseVirtRegPressure(Reg); + decreaseRegPressure(Reg); } } + else if (!TargetRegisterInfo::isVirtualRegister(Reg)) { + // Allocatable physregs are always single-use before register rewriting. + decreaseRegPressure(Reg); + } } // Generate liveness for defs. - increasePhysRegPressure(PhysRegOpers.Defs); - increaseVirtRegPressure(VirtRegOpers.Defs); + increaseRegPressure(RegOpers.Defs); // Boost pressure for all dead defs together. - increasePhysRegPressure(PhysRegOpers.DeadDefs); - increaseVirtRegPressure(VirtRegOpers.DeadDefs); - decreasePhysRegPressure(PhysRegOpers.DeadDefs); - decreaseVirtRegPressure(VirtRegOpers.DeadDefs); + increaseRegPressure(RegOpers.DeadDefs); + decreaseRegPressure(RegOpers.DeadDefs); } /// Consider the pressure increase caused by traversing this instruction diff --git a/lib/CodeGen/RegisterScavenging.cpp b/lib/CodeGen/RegisterScavenging.cpp index 5ec6564ce398..07ace7a436c7 100644 --- a/lib/CodeGen/RegisterScavenging.cpp +++ b/lib/CodeGen/RegisterScavenging.cpp @@ -16,21 +16,17 @@ #define DEBUG_TYPE "reg-scavenging" #include "llvm/CodeGen/RegisterScavenging.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; /// setUsed - Set the register and its sub-registers as being used. @@ -43,15 +39,17 @@ void RegScavenger::setUsed(unsigned Reg) { bool RegScavenger::isAliasUsed(unsigned Reg) const { for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) - if (isUsed(*AI)) + if (isUsed(*AI, *AI == Reg)) return true; return false; } void RegScavenger::initRegState() { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; + for (SmallVector::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + I->Reg = 0; + I->Restore = NULL; + } // All registers started out unused. RegsAvailable.set(); @@ -112,27 +110,11 @@ void RegScavenger::addRegWithSubRegs(BitVector &BV, unsigned Reg) { BV.set(*SubRegs); } -void RegScavenger::forward() { - // Move ptr forward. - if (!Tracking) { - MBBI = MBB->begin(); - Tracking = true; - } else { - assert(MBBI != MBB->end() && "Already past the end of the basic block!"); - MBBI = llvm::next(MBBI); - } - assert(MBBI != MBB->end() && "Already at the end of the basic block!"); +void RegScavenger::determineKillsAndDefs() { + assert(Tracking && "Must be tracking to determine kills and defs"); MachineInstr *MI = MBBI; - - if (MI == ScavengeRestore) { - ScavengedReg = 0; - ScavengedRC = NULL; - ScavengeRestore = NULL; - } - - if (MI->isDebugValue()) - return; + assert(!MI->isDebugValue() && "Debug values have no kills or defs"); // Find out which registers are early clobbered, killed, defined, and marked // def-dead in this instruction. @@ -149,7 +131,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { @@ -166,6 +148,54 @@ void RegScavenger::forward() { addRegWithSubRegs(DefRegs, Reg); } } +} + +void RegScavenger::unprocess() { + assert(Tracking && "Cannot unprocess because we're not tracking"); + + MachineInstr *MI = MBBI; + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); + + // Commit the changes. + setUsed(KillRegs); + setUnused(DefRegs); + + if (MBBI == MBB->begin()) { + MBBI = MachineBasicBlock::iterator(NULL); + Tracking = false; + } else + --MBBI; +} + +void RegScavenger::forward() { + // Move ptr forward. + if (!Tracking) { + MBBI = MBB->begin(); + Tracking = true; + } else { + assert(MBBI != MBB->end() && "Already past the end of the basic block!"); + MBBI = llvm::next(MBBI); + } + assert(MBBI != MBB->end() && "Already at the end of the basic block!"); + + MachineInstr *MI = MBBI; + + for (SmallVector::iterator I = Scavenged.begin(), + IE = Scavenged.end(); I != IE; ++I) { + if (I->Restore != MI) + continue; + + I->Reg = 0; + I->Restore = NULL; + } + + if (MI->isDebugValue()) + return; + + determineKillsAndDefs(); // Verify uses and defs. #ifndef NDEBUG @@ -174,7 +204,7 @@ void RegScavenger::forward() { if (!MO.isReg()) continue; unsigned Reg = MO.getReg(); - if (!Reg || isReserved(Reg)) + if (!Reg || TargetRegisterInfo::isVirtualRegister(Reg) || isReserved(Reg)) continue; if (MO.isUse()) { if (MO.isUndef()) @@ -320,6 +350,16 @@ unsigned RegScavenger::findSurvivorReg(MachineBasicBlock::iterator StartMI, return Survivor; } +static unsigned getFrameIndexOperandNum(MachineInstr *MI) { + unsigned i = 0; + while (!MI->getOperand(i).isFI()) { + ++i; + assert(i < MI->getNumOperands() && + "Instr doesn't have FrameIndex operand!"); + } + return i; +} + unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj) { @@ -354,33 +394,47 @@ unsigned RegScavenger::scavengeRegister(const TargetRegisterClass *RC, return SReg; } - assert(ScavengedReg == 0 && - "Scavenger slot is live, unable to scavenge another register!"); + // Find an available scavenging slot. + unsigned SI; + for (SI = 0; SI < Scavenged.size(); ++SI) + if (Scavenged[SI].Reg == 0) + break; + + if (SI == Scavenged.size()) { + // We need to scavenge a register but have no spill slot, the target + // must know how to do it (if not, we'll assert below). + Scavenged.push_back(ScavengedInfo()); + } // Avoid infinite regress - ScavengedReg = SReg; + Scavenged[SI].Reg = SReg; // If the target knows how to save/restore the register, let it do so; // otherwise, use the emergency stack spill slot. if (!TRI->saveScavengerRegister(*MBB, I, UseMI, RC, SReg)) { // Spill the scavenged register before I. - assert(ScavengingFrameIndex >= 0 && + assert(Scavenged[SI].FrameIndex >= 0 && "Cannot scavenge register without an emergency spill slot!"); - TII->storeRegToStackSlot(*MBB, I, SReg, true, ScavengingFrameIndex, RC,TRI); + TII->storeRegToStackSlot(*MBB, I, SReg, true, Scavenged[SI].FrameIndex, + RC, TRI); MachineBasicBlock::iterator II = prior(I); - TRI->eliminateFrameIndex(II, SPAdj, this); + + unsigned FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); // Restore the scavenged register before its use (or first terminator). - TII->loadRegFromStackSlot(*MBB, UseMI, SReg, ScavengingFrameIndex, RC, TRI); + TII->loadRegFromStackSlot(*MBB, UseMI, SReg, Scavenged[SI].FrameIndex, + RC, TRI); II = prior(UseMI); - TRI->eliminateFrameIndex(II, SPAdj, this); + + FIOperandNum = getFrameIndexOperandNum(II); + TRI->eliminateFrameIndex(II, SPAdj, FIOperandNum, this); } - ScavengeRestore = prior(UseMI); + Scavenged[SI].Restore = prior(UseMI); // Doing this here leads to infinite regress. - // ScavengedReg = SReg; - ScavengedRC = RC; + // Scavenged[SI].Reg = SReg; DEBUG(dbgs() << "Scavenged register (with spill): " << TRI->getName(SReg) << "\n"); diff --git a/lib/CodeGen/ScheduleDAG.cpp b/lib/CodeGen/ScheduleDAG.cpp index 9a6507100170..07e5b470fb1e 100644 --- a/lib/CodeGen/ScheduleDAG.cpp +++ b/lib/CodeGen/ScheduleDAG.cpp @@ -16,12 +16,12 @@ #include "llvm/CodeGen/ScheduleDAG.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -62,10 +62,14 @@ const MCInstrDesc *ScheduleDAG::getNodeDesc(const SDNode *Node) const { /// addPred - This adds the specified edge as a pred of the current node if /// not already. It also adds the current node as a successor of the /// specified node. -bool SUnit::addPred(const SDep &D) { +bool SUnit::addPred(const SDep &D, bool Required) { // If this node already has this depenence, don't add a redundant one. for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) { + // Zero-latency weak edges may be added purely for heuristic ordering. Don't + // add them if another kind of edge already exists. + if (!Required && I->getSUnit() == D.getSUnit()) + return false; if (I->overlaps(D)) { // Extend the latency if needed. Equivalent to removePred(I) + addPred(D). if (I->getLatency() < D.getLatency()) { @@ -97,12 +101,22 @@ bool SUnit::addPred(const SDep &D) { ++N->NumSuccs; } if (!N->isScheduled) { - assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); - ++NumPredsLeft; + if (D.isWeak()) { + ++WeakPredsLeft; + } + else { + assert(NumPredsLeft < UINT_MAX && "NumPredsLeft will overflow!"); + ++NumPredsLeft; + } } if (!isScheduled) { - assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); - ++N->NumSuccsLeft; + if (D.isWeak()) { + ++N->WeakSuccsLeft; + } + else { + assert(N->NumSuccsLeft < UINT_MAX && "NumSuccsLeft will overflow!"); + ++N->NumSuccsLeft; + } } Preds.push_back(D); N->Succs.push_back(P); @@ -121,20 +135,14 @@ void SUnit::removePred(const SDep &D) { for (SmallVector::iterator I = Preds.begin(), E = Preds.end(); I != E; ++I) if (*I == D) { - bool FoundSucc = false; // Find the corresponding successor in N. SDep P = D; P.setSUnit(this); SUnit *N = D.getSUnit(); - for (SmallVector::iterator II = N->Succs.begin(), - EE = N->Succs.end(); II != EE; ++II) - if (*II == P) { - FoundSucc = true; - N->Succs.erase(II); - break; - } - assert(FoundSucc && "Mismatching preds / succs lists!"); - (void)FoundSucc; + SmallVectorImpl::iterator Succ = std::find(N->Succs.begin(), + N->Succs.end(), P); + assert(Succ != N->Succs.end() && "Mismatching preds / succs lists!"); + N->Succs.erase(Succ); Preds.erase(I); // Update the bookkeeping. if (P.getKind() == SDep::Data) { @@ -144,12 +152,20 @@ void SUnit::removePred(const SDep &D) { --N->NumSuccs; } if (!N->isScheduled) { - assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); - --NumPredsLeft; + if (D.isWeak()) + --WeakPredsLeft; + else { + assert(NumPredsLeft > 0 && "NumPredsLeft will underflow!"); + --NumPredsLeft; + } } if (!isScheduled) { - assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); - --N->NumSuccsLeft; + if (D.isWeak()) + --N->WeakSuccsLeft; + else { + assert(N->NumSuccsLeft > 0 && "NumSuccsLeft will underflow!"); + --N->NumSuccsLeft; + } } if (P.getLatency() != 0) { this->setDepthDirty(); @@ -279,6 +295,21 @@ void SUnit::ComputeHeight() { } while (!WorkList.empty()); } +void SUnit::biasCriticalPath() { + if (NumPreds < 2) + return; + + SUnit::pred_iterator BestI = Preds.begin(); + unsigned MaxDepth = BestI->getSUnit()->getDepth(); + for (SUnit::pred_iterator + I = llvm::next(BestI), E = Preds.end(); I != E; ++I) { + if (I->getKind() == SDep::Data && I->getSUnit()->getDepth() > MaxDepth) + BestI = I; + } + if (BestI != Preds.begin()) + std::swap(*Preds.begin(), *BestI); +} + #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) /// SUnit - Scheduling unit. It's an wrapper around either a single SDNode or /// a group of nodes flagged together. @@ -292,10 +323,14 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { dbgs() << " # preds left : " << NumPredsLeft << "\n"; dbgs() << " # succs left : " << NumSuccsLeft << "\n"; + if (WeakPredsLeft) + dbgs() << " # weak preds left : " << WeakPredsLeft << "\n"; + if (WeakSuccsLeft) + dbgs() << " # weak succs left : " << WeakSuccsLeft << "\n"; dbgs() << " # rdefs left : " << NumRegDefsLeft << "\n"; dbgs() << " Latency : " << Latency << "\n"; - dbgs() << " Depth : " << Depth << "\n"; - dbgs() << " Height : " << Height << "\n"; + dbgs() << " Depth : " << getDepth() << "\n"; + dbgs() << " Height : " << getHeight() << "\n"; if (Preds.size() != 0) { dbgs() << " Predecessors:\n"; @@ -332,6 +367,8 @@ void SUnit::dumpAll(const ScheduleDAG *G) const { if (I->isArtificial()) dbgs() << " *"; dbgs() << ": Latency=" << I->getLatency(); + if (I->isAssignedRegDep()) + dbgs() << " Reg=" << PrintReg(I->getReg(), G->TRI); dbgs() << "\n"; } } @@ -429,6 +466,8 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { Node2Index.resize(DAGSize); // Initialize the data structures. + if (ExitSU) + WorkList.push_back(ExitSU); for (unsigned i = 0, e = DAGSize; i != e; ++i) { SUnit *SU = &SUnits[i]; int NodeNum = SU->NodeNum; @@ -448,11 +487,12 @@ void ScheduleDAGTopologicalSort::InitDAGTopologicalSorting() { while (!WorkList.empty()) { SUnit *SU = WorkList.back(); WorkList.pop_back(); - Allocate(SU->NodeNum, --Id); + if (SU->NodeNum < DAGSize) + Allocate(SU->NodeNum, --Id); for (SUnit::const_pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); I != E; ++I) { SUnit *SU = I->getSUnit(); - if (!--Node2Index[SU->NodeNum]) + if (SU->NodeNum < DAGSize && !--Node2Index[SU->NodeNum]) // If all dependencies of the node are processed already, // then the node can be computed now. WorkList.push_back(SU); @@ -513,7 +553,10 @@ void ScheduleDAGTopologicalSort::DFS(const SUnit *SU, int UpperBound, WorkList.pop_back(); Visited.set(SU->NodeNum); for (int I = SU->Succs.size()-1; I >= 0; --I) { - int s = SU->Succs[I].getSUnit()->NodeNum; + unsigned s = SU->Succs[I].getSUnit()->NodeNum; + // Edges to non-SUnits are allowed but ignored (e.g. ExitSU). + if (s >= Node2Index.size()) + continue; if (Node2Index[s] == UpperBound) { HasLoop = true; return; @@ -554,15 +597,16 @@ void ScheduleDAGTopologicalSort::Shift(BitVector& Visited, int LowerBound, } -/// WillCreateCycle - Returns true if adding an edge from SU to TargetSU will -/// create a cycle. -bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *SU, SUnit *TargetSU) { - if (IsReachable(TargetSU, SU)) +/// WillCreateCycle - Returns true if adding an edge to TargetSU from SU will +/// create a cycle. If so, it is not safe to call AddPred(TargetSU, SU). +bool ScheduleDAGTopologicalSort::WillCreateCycle(SUnit *TargetSU, SUnit *SU) { + // Is SU reachable from TargetSU via successor edges? + if (IsReachable(SU, TargetSU)) return true; - for (SUnit::pred_iterator I = SU->Preds.begin(), E = SU->Preds.end(); - I != E; ++I) + for (SUnit::pred_iterator + I = TargetSU->Preds.begin(), E = TargetSU->Preds.end(); I != E; ++I) if (I->isAssignedRegDep() && - IsReachable(TargetSU, I->getSUnit())) + IsReachable(SU, I->getSUnit())) return true; return false; } @@ -592,6 +636,7 @@ void ScheduleDAGTopologicalSort::Allocate(int n, int index) { } ScheduleDAGTopologicalSort:: -ScheduleDAGTopologicalSort(std::vector &sunits) : SUnits(sunits) {} +ScheduleDAGTopologicalSort(std::vector &sunits, SUnit *exitsu) + : SUnits(sunits), ExitSU(exitsu) {} ScheduleHazardRecognizer::~ScheduleHazardRecognizer() {} diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index a4d4a93e6dd5..71e7a21ef2bc 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -12,8 +12,11 @@ // //===----------------------------------------------------------------------===// -#define DEBUG_TYPE "sched-instrs" -#include "llvm/Operator.h" +#define DEBUG_TYPE "misched" +#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/MapVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" @@ -22,19 +25,17 @@ #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" #include "llvm/CodeGen/RegisterPressure.h" -#include "llvm/CodeGen/ScheduleDAGILP.h" -#include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/CodeGen/ScheduleDFS.h" +#include "llvm/IR/Operator.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; static cl::opt EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, @@ -66,7 +67,7 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { // regular getUnderlyingObjectFromInt. if (U->getOpcode() == Instruction::PtrToInt) return U->getOperand(0); - // If we find an add of a constant or a multiplied value, it's + // If we find an add of a constant, a multiplied value, or a phi, it's // likely that the other operand will lead us to the base // object. We don't have to worry about the case where the // object address is somehow being computed by the multiply, @@ -74,7 +75,8 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { // identifiable object. if (U->getOpcode() != Instruction::Add || (!isa(U->getOperand(1)) && - Operator::getOpcode(U->getOperand(1)) != Instruction::Mul)) + Operator::getOpcode(U->getOperand(1)) != Instruction::Mul && + !isa(U->getOperand(1)))) return V; V = U->getOperand(0); } else { @@ -84,56 +86,77 @@ static const Value *getUnderlyingObjectFromInt(const Value *V) { } while (1); } -/// getUnderlyingObject - This is a wrapper around GetUnderlyingObject +/// getUnderlyingObjects - This is a wrapper around GetUnderlyingObjects /// and adds support for basic ptrtoint+arithmetic+inttoptr sequences. -static const Value *getUnderlyingObject(const Value *V) { - // First just call Value::getUnderlyingObject to let it do what it does. +static void getUnderlyingObjects(const Value *V, + SmallVectorImpl &Objects) { + SmallPtrSet Visited; + SmallVector Working(1, V); do { - V = GetUnderlyingObject(V); - // If it found an inttoptr, use special code to continue climing. - if (Operator::getOpcode(V) != Instruction::IntToPtr) - break; - const Value *O = getUnderlyingObjectFromInt(cast(V)->getOperand(0)); - // If that succeeded in finding a pointer, continue the search. - if (!O->getType()->isPointerTy()) - break; - V = O; - } while (1); - return V; + V = Working.pop_back_val(); + + SmallVector Objs; + GetUnderlyingObjects(const_cast(V), Objs); + + for (SmallVector::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + V = *I; + if (!Visited.insert(V)) + continue; + if (Operator::getOpcode(V) == Instruction::IntToPtr) { + const Value *O = + getUnderlyingObjectFromInt(cast(V)->getOperand(0)); + if (O->getType()->isPointerTy()) { + Working.push_back(O); + continue; + } + } + Objects.push_back(const_cast(V)); + } + } while (!Working.empty()); } -/// getUnderlyingObjectForInstr - If this machine instr has memory reference +/// getUnderlyingObjectsForInstr - If this machine instr has memory reference /// information and it can be tracked to a normal reference to a known -/// object, return the Value for that object. Otherwise return null. -static const Value *getUnderlyingObjectForInstr(const MachineInstr *MI, - const MachineFrameInfo *MFI, - bool &MayAlias) { - MayAlias = true; +/// object, return the Value for that object. +static void getUnderlyingObjectsForInstr(const MachineInstr *MI, + const MachineFrameInfo *MFI, + SmallVectorImpl > &Objects) { if (!MI->hasOneMemOperand() || !(*MI->memoperands_begin())->getValue() || (*MI->memoperands_begin())->isVolatile()) - return 0; + return; const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) - return 0; - - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) - return 0; - - MayAlias = PSV->mayAlias(MFI); - return V; - } + return; + + SmallVector Objs; + getUnderlyingObjects(V, Objs); - if (isIdentifiedObject(V)) - return V; + for (SmallVector::iterator I = Objs.begin(), IE = Objs.end(); + I != IE; ++I) { + bool MayAlias = true; + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast(V)) { + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. - return 0; + if (PSV->isAliased(MFI)) { + Objects.clear(); + return; + } + + MayAlias = PSV->mayAlias(MFI); + } else if (!isIdentifiedObject(V)) { + Objects.clear(); + return; + } + + Objects.push_back(std::make_pair(V, MayAlias)); + } } void ScheduleDAGInstrs::startBlock(MachineBasicBlock *bb) { @@ -145,20 +168,6 @@ void ScheduleDAGInstrs::finishBlock() { BB = 0; } -/// Initialize the map with the number of registers. -void Reg2SUnitsMap::setRegLimit(unsigned Limit) { - PhysRegSet.setUniverse(Limit); - SUnits.resize(Limit); -} - -/// Clear the map without deallocating storage. -void Reg2SUnitsMap::clear() { - for (const_iterator I = reg_begin(), E = reg_end(); I != E; ++I) { - SUnits[*I].clear(); - } - PhysRegSet.clear(); -} - /// Initialize the DAG and common scheduler state for the current scheduling /// region. This does not actually create the DAG, only clears it. The /// scheduling driver may call BuildSchedGraph multiple times per scheduling @@ -205,10 +214,11 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { if (Reg == 0) continue; if (TRI->isPhysicalRegister(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); else { assert(!IsPostRA && "Virtual register encountered after regalloc."); - addVRegUseDeps(&ExitSU, i); + if (MO.readsReg()) // ignore undef operands + addVRegUseDeps(&ExitSU, i); } } } else { @@ -221,7 +231,7 @@ void ScheduleDAGInstrs::addSchedBarrierDeps() { E = (*SI)->livein_end(); I != E; ++I) { unsigned Reg = *I; if (!Uses.contains(Reg)) - Uses[Reg].push_back(PhysRegSUOper(&ExitSU, -1)); + Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg)); } } } @@ -239,27 +249,31 @@ void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Uses.contains(*Alias)) continue; - std::vector &UseList = Uses[*Alias]; - for (unsigned i = 0, e = UseList.size(); i != e; ++i) { - SUnit *UseSU = UseList[i].SU; + for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) { + SUnit *UseSU = I->SU; if (UseSU == SU) continue; - SDep dep(SU, SDep::Data, *Alias); - // Adjust the dependence latency using operand def/use information, // then allow the target to perform its own adjustments. - int UseOp = UseList[i].OpIdx; - MachineInstr *RegUse = UseOp < 0 ? 0 : UseSU->getInstr(); - dep.setLatency( + int UseOp = I->OpIdx; + MachineInstr *RegUse = 0; + SDep Dep; + if (UseOp < 0) + Dep = SDep(SU, SDep::Artificial); + else { + Dep = SDep(SU, SDep::Data, *Alias); + RegUse = UseSU->getInstr(); + Dep.setMinLatency( + SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, + RegUse, UseOp, /*FindMin=*/true)); + } + Dep.setLatency( SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, RegUse, UseOp, /*FindMin=*/false)); - dep.setMinLatency( - SchedModel.computeOperandLatency(SU->getInstr(), OperIdx, - RegUse, UseOp, /*FindMin=*/true)); - ST.adjustSchedDependency(SU, UseSU, dep); - UseSU->addPred(dep); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); } } } @@ -282,9 +296,8 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { Alias.isValid(); ++Alias) { if (!Defs.contains(*Alias)) continue; - std::vector &DefList = Defs[*Alias]; - for (unsigned i = 0, e = DefList.size(); i != e; ++i) { - SUnit *DefSU = DefList[i].SU; + for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) { + SUnit *DefSU = I->SU; if (DefSU == &ExitSU) continue; if (DefSU != SU && @@ -308,33 +321,37 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { // Either insert a new Reg2SUnits entry with an empty SUnits list, or // retrieve the existing SUnits list for this register's uses. // Push this SUnit on the use list. - Uses[MO.getReg()].push_back(PhysRegSUOper(SU, OperIdx)); + Uses.insert(PhysRegSUOper(SU, OperIdx, MO.getReg())); } else { addPhysRegDataDeps(SU, OperIdx); - - // Either insert a new Reg2SUnits entry with an empty SUnits list, or - // retrieve the existing SUnits list for this register's defs. - std::vector &DefList = Defs[MO.getReg()]; + unsigned Reg = MO.getReg(); // clear this register's use list - if (Uses.contains(MO.getReg())) - Uses[MO.getReg()].clear(); - - if (!MO.isDead()) - DefList.clear(); - - // Calls will not be reordered because of chain dependencies (see - // below). Since call operands are dead, calls may continue to be added - // to the DefList making dependence checking quadratic in the size of - // the block. Instead, we leave only one call at the back of the - // DefList. - if (SU->isCall) { - while (!DefList.empty() && DefList.back().SU->isCall) - DefList.pop_back(); + if (Uses.contains(Reg)) + Uses.eraseAll(Reg); + + if (!MO.isDead()) { + Defs.eraseAll(Reg); + } else if (SU->isCall) { + // Calls will not be reordered because of chain dependencies (see + // below). Since call operands are dead, calls may continue to be added + // to the DefList making dependence checking quadratic in the size of + // the block. Instead, we leave only one call at the back of the + // DefList. + Reg2SUnitsMap::RangePair P = Defs.equal_range(Reg); + Reg2SUnitsMap::iterator B = P.first; + Reg2SUnitsMap::iterator I = P.second; + for (bool isBegin = I == B; !isBegin; /* empty */) { + isBegin = (--I) == B; + if (!I->SU->isCall) + break; + I = Defs.erase(I); + } } + // Defs are pushed in the order they are visited and never reordered. - DefList.push_back(PhysRegSUOper(SU, OperIdx)); + Defs.insert(PhysRegSUOper(SU, OperIdx, Reg)); } } @@ -445,23 +462,29 @@ static inline bool isUnsafeMemoryObject(MachineInstr *MI, if ((*MI->memoperands_begin())->isVolatile() || MI->hasUnmodeledSideEffects()) return true; - const Value *V = (*MI->memoperands_begin())->getValue(); if (!V) return true; - V = getUnderlyingObject(V); - if (const PseudoSourceValue *PSV = dyn_cast(V)) { - // Similarly to getUnderlyingObjectForInstr: - // For now, ignore PseudoSourceValues which may alias LLVM IR values - // because the code that uses this function has no way to cope with - // such aliases. - if (PSV->isAliased(MFI)) + SmallVector Objs; + getUnderlyingObjects(V, Objs); + for (SmallVector::iterator I = Objs.begin(), + IE = Objs.end(); I != IE; ++I) { + V = *I; + + if (const PseudoSourceValue *PSV = dyn_cast(V)) { + // Similarly to getUnderlyingObjectForInstr: + // For now, ignore PseudoSourceValues which may alias LLVM IR values + // because the code that uses this function has no way to cope with + // such aliases. + if (PSV->isAliased(MFI)) + return true; + } + + // Does this pointer refer to a distinct and identifiable object? + if (!isIdentifiedObject(V)) return true; } - // Does this pointer refer to a distinct and identifiable object? - if (!isIdentifiedObject(V)) - return true; return false; } @@ -680,8 +703,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, // so that they can be given more precise dependencies. We track // separately the known memory locations that may alias and those // that are known not to alias - std::map AliasMemDefs, NonAliasMemDefs; - std::map > AliasMemUses, NonAliasMemUses; + MapVector AliasMemDefs, NonAliasMemDefs; + MapVector > AliasMemUses, NonAliasMemUses; std::set RejectMemNodes; // Remove any stale debug info; sometimes BuildSchedGraph is called again @@ -691,8 +714,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(Defs.empty() && Uses.empty() && "Only BuildGraph should update Defs/Uses"); - Defs.setRegLimit(TRI->getNumRegs()); - Uses.setRegLimit(TRI->getNumRegs()); + Defs.setUniverse(TRI->getNumRegs()); + Uses.setUniverse(TRI->getNumRegs()); assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); // FIXME: Allow SparseSet to reserve space for the creation of virtual @@ -705,17 +728,17 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addSchedBarrierDeps(); // Walk the list of instructions, from bottom moving up. - MachineInstr *PrevMI = NULL; + MachineInstr *DbgMI = NULL; for (MachineBasicBlock::iterator MII = RegionEnd, MIE = RegionBegin; MII != MIE; --MII) { MachineInstr *MI = prior(MII); - if (MI && PrevMI) { - DbgValues.push_back(std::make_pair(PrevMI, MI)); - PrevMI = NULL; + if (MI && DbgMI) { + DbgValues.push_back(std::make_pair(DbgMI, MI)); + DbgMI = NULL; } if (MI->isDebugValue()) { - PrevMI = MI; + DbgMI = MI; continue; } if (RPTracker) { @@ -723,13 +746,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, assert(RPTracker->getPos() == prior(MII) && "RPTracker can't find MI"); } - assert((!MI->isTerminator() || CanHandleTerminators) && !MI->isLabel() && + assert((CanHandleTerminators || (!MI->isTerminator() && !MI->isLabel())) && "Cannot schedule terminators or labels!"); SUnit *SU = MISUnitMap[MI]; assert(SU && "No SUnit mapped to this MI"); // Add register-based dependencies (data, anti, and output). + bool HasVRegDef = false; for (unsigned j = 0, n = MI->getNumOperands(); j != n; ++j) { const MachineOperand &MO = MI->getOperand(j); if (!MO.isReg()) continue; @@ -740,12 +764,26 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, addPhysRegDeps(SU, j); else { assert(!IsPostRA && "Virtual register encountered!"); - if (MO.isDef()) + if (MO.isDef()) { + HasVRegDef = true; addVRegDefDeps(SU, j); + } else if (MO.readsReg()) // ignore undef operands addVRegUseDeps(SU, j); } } + // If we haven't seen any uses in this scheduling region, create a + // dependence edge to ExitSU to model the live-out latency. This is required + // for vreg defs with no in-region use, and prefetches with no vreg def. + // + // FIXME: NumDataSuccs would be more precise than NumSuccs here. This + // check currently relies on being called before adding chain deps. + if (SU->NumSuccs == 0 && SU->Latency > 1 + && (HasVRegDef || MI->mayLoad())) { + SDep Dep(SU, SDep::Artificial); + Dep.setLatency(SU->Latency - 1); + ExitSU.addPred(Dep); + } // Add chain dependencies. // Chain dependencies used to enforce memory order should have @@ -760,11 +798,11 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (isGlobalMemoryObject(AA, MI)) { // Be conservative with these and add dependencies on all memory // references, even those that are known to not alias. - for (std::map::iterator I = + for (MapVector::iterator I = NonAliasMemDefs.begin(), E = NonAliasMemDefs.end(); I != E; ++I) { I->second->addPred(SDep(SU, SDep::Barrier)); } - for (std::map >::iterator I = + for (MapVector >::iterator I = NonAliasMemUses.begin(), E = NonAliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) { SDep Dep(SU, SDep::Barrier); @@ -798,10 +836,10 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, TrueMemOrderLatency); - for (std::map::iterator I = AliasMemDefs.begin(), + for (MapVector::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); - for (std::map >::iterator I = + for (MapVector >::iterator I = AliasMemUses.begin(), E = AliasMemUses.end(); I != E; ++I) { for (unsigned i = 0, e = I->second.size(); i != e; ++i) addChainDependency(AA, MFI, SU, I->second[i], RejectMemNodes, @@ -813,60 +851,70 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, AliasMemDefs.clear(); AliasMemUses.clear(); } else if (MI->mayStore()) { - bool MayAlias = true; - if (const Value *V = getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { + SmallVector, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { + // Treat all other stores conservatively. + goto new_alias_chain; + } + + bool MayAlias = false; + for (SmallVector, 4>::iterator + K = Objs.begin(), KE = Objs.end(); K != KE; ++K) { + const Value *V = K->first; + bool ThisMayAlias = K->second; + if (ThisMayAlias) + MayAlias = true; + // A store to a specific PseudoSourceValue. Add precise dependencies. // Record the def in MemDefs, first adding a dep if there is // an existing def. - std::map::iterator I = - ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - std::map::iterator IE = - ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + MapVector::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); if (I != IE) { - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, - 0, true); + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); I->second = SU; } else { - if (MayAlias) + if (ThisMayAlias) AliasMemDefs[V] = SU; else NonAliasMemDefs[V] = SU; } // Handle the uses in MemUses, if there are any. - std::map >::iterator J = - ((MayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); - std::map >::iterator JE = - ((MayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); + MapVector >::iterator J = + ((ThisMayAlias) ? AliasMemUses.find(V) : NonAliasMemUses.find(V)); + MapVector >::iterator JE = + ((ThisMayAlias) ? AliasMemUses.end() : NonAliasMemUses.end()); if (J != JE) { for (unsigned i = 0, e = J->second.size(); i != e; ++i) addChainDependency(AA, MFI, SU, J->second[i], RejectMemNodes, TrueMemOrderLatency, true); J->second.clear(); } - if (MayAlias) { - // Add dependencies from all the PendingLoads, i.e. loads - // with no underlying object. - for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) - addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, - TrueMemOrderLatency); - // Add dependence on alias chain, if needed. - if (AliasChain) - addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); - // But we also should check dependent instructions for the - // SU in question. - adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, - TrueMemOrderLatency); - } - // Add dependence on barrier chain, if needed. - // There is no point to check aliasing on barrier event. Even if - // SU and barrier _could_ be reordered, they should not. In addition, - // we have lost all RejectMemNodes below barrier. - if (BarrierChain) - BarrierChain->addPred(SDep(SU, SDep::Barrier)); - } else { - // Treat all other stores conservatively. - goto new_alias_chain; } + if (MayAlias) { + // Add dependencies from all the PendingLoads, i.e. loads + // with no underlying object. + for (unsigned k = 0, m = PendingLoads.size(); k != m; ++k) + addChainDependency(AA, MFI, SU, PendingLoads[k], RejectMemNodes, + TrueMemOrderLatency); + // Add dependence on alias chain, if needed. + if (AliasChain) + addChainDependency(AA, MFI, SU, AliasChain, RejectMemNodes); + // But we also should check dependent instructions for the + // SU in question. + adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, + TrueMemOrderLatency); + } + // Add dependence on barrier chain, if needed. + // There is no point to check aliasing on barrier event. Even if + // SU and barrier _could_ be reordered, they should not. In addition, + // we have lost all RejectMemNodes below barrier. + if (BarrierChain) + BarrierChain->addPred(SDep(SU, SDep::Barrier)); if (!ExitSU.isPred(SU)) // Push store's up a bit to avoid them getting in between cmp @@ -877,28 +925,41 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, if (MI->isInvariantLoad(AA)) { // Invariant load, no chain dependencies needed! } else { - if (const Value *V = - getUnderlyingObjectForInstr(MI, MFI, MayAlias)) { - // A load from a specific PseudoSourceValue. Add precise dependencies. - std::map::iterator I = - ((MayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); - std::map::iterator IE = - ((MayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); - if (I != IE) - addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); - if (MayAlias) - AliasMemUses[V].push_back(SU); - else - NonAliasMemUses[V].push_back(SU); - } else { + SmallVector, 4> Objs; + getUnderlyingObjectsForInstr(MI, MFI, Objs); + + if (Objs.empty()) { // A load with no underlying object. Depend on all // potentially aliasing stores. - for (std::map::iterator I = + for (MapVector::iterator I = AliasMemDefs.begin(), E = AliasMemDefs.end(); I != E; ++I) addChainDependency(AA, MFI, SU, I->second, RejectMemNodes); PendingLoads.push_back(SU); MayAlias = true; + } else { + MayAlias = false; + } + + for (SmallVector, 4>::iterator + J = Objs.begin(), JE = Objs.end(); J != JE; ++J) { + const Value *V = J->first; + bool ThisMayAlias = J->second; + + if (ThisMayAlias) + MayAlias = true; + + // A load from a specific PseudoSourceValue. Add precise dependencies. + MapVector::iterator I = + ((ThisMayAlias) ? AliasMemDefs.find(V) : NonAliasMemDefs.find(V)); + MapVector::iterator IE = + ((ThisMayAlias) ? AliasMemDefs.end() : NonAliasMemDefs.end()); + if (I != IE) + addChainDependency(AA, MFI, SU, I->second, RejectMemNodes, 0, true); + if (ThisMayAlias) + AliasMemUses[V].push_back(SU); + else + NonAliasMemUses[V].push_back(SU); } if (MayAlias) adjustChainDeps(AA, MFI, SU, &ExitSU, RejectMemNodes, /*Latency=*/0); @@ -910,8 +971,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, } } } - if (PrevMI) - FirstDbgValue = PrevMI; + if (DbgMI) + FirstDbgValue = DbgMI; Defs.clear(); Uses.clear(); @@ -933,7 +994,7 @@ std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const { else if (SU == &ExitSU) oss << ""; else - SU->getInstr()->print(oss); + SU->getInstr()->print(oss, &TM, /*SkipOpers=*/true); return oss.str(); } @@ -943,6 +1004,203 @@ std::string ScheduleDAGInstrs::getDAGName() const { return "dag." + BB->getFullName(); } +//===----------------------------------------------------------------------===// +// SchedDFSResult Implementation +//===----------------------------------------------------------------------===// + +namespace llvm { +/// \brief Internal state used to compute SchedDFSResult. +class SchedDFSImpl { + SchedDFSResult &R; + + /// Join DAG nodes into equivalence classes by their subtree. + IntEqClasses SubtreeClasses; + /// List PredSU, SuccSU pairs that represent data edges between subtrees. + std::vector > ConnectionPairs; + + struct RootData { + unsigned NodeID; + unsigned ParentNodeID; // Parent node (member of the parent subtree). + unsigned SubInstrCount; // Instr count in this tree only, not children. + + RootData(unsigned id): NodeID(id), + ParentNodeID(SchedDFSResult::InvalidSubtreeID), + SubInstrCount(0) {} + + unsigned getSparseSetIndex() const { return NodeID; } + }; + + SparseSet RootSet; + +public: + SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) { + RootSet.setUniverse(R.DFSNodeData.size()); + } + + /// Return true if this node been visited by the DFS traversal. + /// + /// During visitPostorderNode the Node's SubtreeID is assigned to the Node + /// ID. Later, SubtreeID is updated but remains valid. + bool isVisited(const SUnit *SU) const { + return R.DFSNodeData[SU->NodeNum].SubtreeID + != SchedDFSResult::InvalidSubtreeID; + } + + /// Initialize this node's instruction count. We don't need to flag the node + /// visited until visitPostorder because the DAG cannot have cycles. + void visitPreorder(const SUnit *SU) { + R.DFSNodeData[SU->NodeNum].InstrCount = + SU->getInstr()->isTransient() ? 0 : 1; + } + + /// Called once for each node after all predecessors are visited. Revisit this + /// node's predecessors and potentially join them now that we know the ILP of + /// the other predecessors. + void visitPostorderNode(const SUnit *SU) { + // Mark this node as the root of a subtree. It may be joined with its + // successors later. + R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum; + RootData RData(SU->NodeNum); + RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1; + + // If any predecessors are still in their own subtree, they either cannot be + // joined or are large enough to remain separate. If this parent node's + // total instruction count is not greater than a child subtree by at least + // the subtree limit, then try to join it now since splitting subtrees is + // only useful if multiple high-pressure paths are possible. + unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount; + for (SUnit::const_pred_iterator + PI = SU->Preds.begin(), PE = SU->Preds.end(); PI != PE; ++PI) { + if (PI->getKind() != SDep::Data) + continue; + unsigned PredNum = PI->getSUnit()->NodeNum; + if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit) + joinPredSubtree(*PI, SU, /*CheckLimit=*/false); + + // Either link or merge the TreeData entry from the child to the parent. + if (R.DFSNodeData[PredNum].SubtreeID == PredNum) { + // If the predecessor's parent is invalid, this is a tree edge and the + // current node is the parent. + if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID) + RootSet[PredNum].ParentNodeID = SU->NodeNum; + } + else if (RootSet.count(PredNum)) { + // The predecessor is not a root, but is still in the root set. This + // must be the new parent that it was just joined to. Note that + // RootSet[PredNum].ParentNodeID may either be invalid or may still be + // set to the original parent. + RData.SubInstrCount += RootSet[PredNum].SubInstrCount; + RootSet.erase(PredNum); + } + } + RootSet[SU->NodeNum] = RData; + } + + /// Called once for each tree edge after calling visitPostOrderNode on the + /// predecessor. Increment the parent node's instruction count and + /// preemptively join this subtree to its parent's if it is small enough. + void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) { + R.DFSNodeData[Succ->NodeNum].InstrCount + += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount; + joinPredSubtree(PredDep, Succ); + } + + /// Add a connection for cross edges. + void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) { + ConnectionPairs.push_back(std::make_pair(PredDep.getSUnit(), Succ)); + } + + /// Set each node's subtree ID to the representative ID and record connections + /// between trees. + void finalize() { + SubtreeClasses.compress(); + R.DFSTreeData.resize(SubtreeClasses.getNumClasses()); + assert(SubtreeClasses.getNumClasses() == RootSet.size() + && "number of roots should match trees"); + for (SparseSet::const_iterator + RI = RootSet.begin(), RE = RootSet.end(); RI != RE; ++RI) { + unsigned TreeID = SubtreeClasses[RI->NodeID]; + if (RI->ParentNodeID != SchedDFSResult::InvalidSubtreeID) + R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[RI->ParentNodeID]; + R.DFSTreeData[TreeID].SubInstrCount = RI->SubInstrCount; + // Note that SubInstrCount may be greater than InstrCount if we joined + // subtrees across a cross edge. InstrCount will be attributed to the + // original parent, while SubInstrCount will be attributed to the joined + // parent. + } + R.SubtreeConnections.resize(SubtreeClasses.getNumClasses()); + R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses()); + DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n"); + for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) { + R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx]; + DEBUG(dbgs() << " SU(" << Idx << ") in tree " + << R.DFSNodeData[Idx].SubtreeID << '\n'); + } + for (std::vector >::const_iterator + I = ConnectionPairs.begin(), E = ConnectionPairs.end(); + I != E; ++I) { + unsigned PredTree = SubtreeClasses[I->first->NodeNum]; + unsigned SuccTree = SubtreeClasses[I->second->NodeNum]; + if (PredTree == SuccTree) + continue; + unsigned Depth = I->first->getDepth(); + addConnection(PredTree, SuccTree, Depth); + addConnection(SuccTree, PredTree, Depth); + } + } + +protected: + /// Join the predecessor subtree with the successor that is its DFS + /// parent. Apply some heuristics before joining. + bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, + bool CheckLimit = true) { + assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges"); + + // Check if the predecessor is already joined. + const SUnit *PredSU = PredDep.getSUnit(); + unsigned PredNum = PredSU->NodeNum; + if (R.DFSNodeData[PredNum].SubtreeID != PredNum) + return false; + + // Four is the magic number of successors before a node is considered a + // pinch point. + unsigned NumDataSucs = 0; + for (SUnit::const_succ_iterator SI = PredSU->Succs.begin(), + SE = PredSU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data) { + if (++NumDataSucs >= 4) + return false; + } + } + if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit) + return false; + R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum; + SubtreeClasses.join(Succ->NodeNum, PredNum); + return true; + } + + /// Called by finalize() to record a connection between trees. + void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) { + if (!Depth) + return; + + do { + SmallVectorImpl &Connections = + R.SubtreeConnections[FromTree]; + for (SmallVectorImpl::iterator + I = Connections.begin(), E = Connections.end(); I != E; ++I) { + if (I->TreeID == ToTree) { + I->Level = std::max(I->Level, Depth); + return; + } + } + Connections.push_back(SchedDFSResult::Connection(ToTree, Depth)); + FromTree = R.DFSTreeData[FromTree].ParentTreeID; + } while (FromTree != SchedDFSResult::InvalidSubtreeID); + } +}; +} // namespace llvm + namespace { /// \brief Manage the stack used by a reverse depth-first search over the DAG. class SchedDAGReverseDFS { @@ -955,7 +1213,10 @@ public: } void advance() { ++DFSStack.back().second; } - void backtrack() { DFSStack.pop_back(); } + const SDep *backtrack() { + DFSStack.pop_back(); + return DFSStack.empty() ? 0 : llvm::prior(DFSStack.back().second); + } const SUnit *getCurr() const { return DFSStack.back().first; } @@ -967,57 +1228,83 @@ public: }; } // anonymous -void ScheduleDAGILP::resize(unsigned NumSUnits) { - ILPValues.resize(NumSUnits); -} - -ILPValue ScheduleDAGILP::getILP(const SUnit *SU) { - return ILPValues[SU->NodeNum]; -} - -// A leaf node has an ILP of 1/1. -static ILPValue initILP(const SUnit *SU) { - unsigned Cnt = SU->getInstr()->isTransient() ? 0 : 1; - return ILPValue(Cnt, 1 + SU->getDepth()); +static bool hasDataSucc(const SUnit *SU) { + for (SUnit::const_succ_iterator + SI = SU->Succs.begin(), SE = SU->Succs.end(); SI != SE; ++SI) { + if (SI->getKind() == SDep::Data && !SI->getSUnit()->isBoundaryNode()) + return true; + } + return false; } /// Compute an ILP metric for all nodes in the subDAG reachable via depth-first /// search from this root. -void ScheduleDAGILP::computeILP(const SUnit *Root) { +void SchedDFSResult::compute(ArrayRef SUnits) { if (!IsBottomUp) llvm_unreachable("Top-down ILP metric is unimplemnted"); - SchedDAGReverseDFS DFS; - // Mark a node visited by validating it. - ILPValues[Root->NodeNum] = initILP(Root); - DFS.follow(Root); - for (;;) { - // Traverse the leftmost path as far as possible. - while (DFS.getPred() != DFS.getPredEnd()) { - const SUnit *PredSU = DFS.getPred()->getSUnit(); - DFS.advance(); - // If the pred is already valid, skip it. - if (ILPValues[PredSU->NodeNum].isValid()) - continue; - ILPValues[PredSU->NodeNum] = initILP(PredSU); - DFS.follow(PredSU); + SchedDFSImpl Impl(*this); + for (ArrayRef::const_iterator + SI = SUnits.begin(), SE = SUnits.end(); SI != SE; ++SI) { + const SUnit *SU = &*SI; + if (Impl.isVisited(SU) || hasDataSucc(SU)) + continue; + + SchedDAGReverseDFS DFS; + Impl.visitPreorder(SU); + DFS.follow(SU); + for (;;) { + // Traverse the leftmost path as far as possible. + while (DFS.getPred() != DFS.getPredEnd()) { + const SDep &PredDep = *DFS.getPred(); + DFS.advance(); + // Ignore non-data edges. + if (PredDep.getKind() != SDep::Data + || PredDep.getSUnit()->isBoundaryNode()) { + continue; + } + // An already visited edge is a cross edge, assuming an acyclic DAG. + if (Impl.isVisited(PredDep.getSUnit())) { + Impl.visitCrossEdge(PredDep, DFS.getCurr()); + continue; + } + Impl.visitPreorder(PredDep.getSUnit()); + DFS.follow(PredDep.getSUnit()); + } + // Visit the top of the stack in postorder and backtrack. + const SUnit *Child = DFS.getCurr(); + const SDep *PredDep = DFS.backtrack(); + Impl.visitPostorderNode(Child); + if (PredDep) + Impl.visitPostorderEdge(*PredDep, DFS.getCurr()); + if (DFS.isComplete()) + break; } - // Visit the top of the stack in postorder and backtrack. - unsigned PredCount = ILPValues[DFS.getCurr()->NodeNum].InstrCount; - DFS.backtrack(); - if (DFS.isComplete()) - break; - // Add the recently finished predecessor's bottom-up descendent count. - ILPValues[DFS.getCurr()->NodeNum].InstrCount += PredCount; + } + Impl.finalize(); +} + +/// The root of the given SubtreeID was just scheduled. For all subtrees +/// connected to this tree, record the depth of the connection so that the +/// nearest connected subtrees can be prioritized. +void SchedDFSResult::scheduleTree(unsigned SubtreeID) { + for (SmallVectorImpl::const_iterator + I = SubtreeConnections[SubtreeID].begin(), + E = SubtreeConnections[SubtreeID].end(); I != E; ++I) { + SubtreeConnectLevels[I->TreeID] = + std::max(SubtreeConnectLevels[I->TreeID], I->Level); + DEBUG(dbgs() << " Tree: " << I->TreeID + << " @" << SubtreeConnectLevels[I->TreeID] << '\n'); } } #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void ILPValue::print(raw_ostream &OS) const { - if (!isValid()) + OS << InstrCount << " / " << Length << " = "; + if (!Length) OS << "BADILP"; - OS << InstrCount << " / " << Cycles << " = " - << format("%g", ((double)InstrCount / Cycles)); + else + OS << format("%g", ((double)InstrCount / Length)); } void ILPValue::dump() const { diff --git a/lib/CodeGen/ScheduleDAGPrinter.cpp b/lib/CodeGen/ScheduleDAGPrinter.cpp index 6e781b199a5f..8ddb3e892f25 100644 --- a/lib/CodeGen/ScheduleDAGPrinter.cpp +++ b/lib/CodeGen/ScheduleDAGPrinter.cpp @@ -11,19 +11,19 @@ // //===----------------------------------------------------------------------===// -#include "llvm/Constants.h" -#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/ScheduleDAG.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -41,6 +41,10 @@ namespace llvm { return true; } + static bool isNodeHidden(const SUnit *Node) { + return (Node->NumPreds > 10 || Node->NumSuccs > 10); + } + static bool hasNodeAddressLabel(const SUnit *Node, const ScheduleDAG *Graph) { return true; diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 37d7731aa158..eb1609575016 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -18,22 +18,23 @@ #define DEBUG_TYPE "dagcombine" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" -#include "llvm/CodeGen/MachineFunction.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include using namespace llvm; @@ -291,6 +292,10 @@ namespace { unsigned SrcValueAlign2, const MDNode *TBAAInfo2) const; + /// isAlias - Return true if there is any possibility that the two addresses + /// overlap. + bool isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1); + /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool FindAliasInfo(SDNode *N, @@ -1178,7 +1183,7 @@ SDValue DAGCombiner::combine(SDNode *N) { // Expose the DAG combiner to the target combiner impls. TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + DagCombineInfo(DAG, Level, false, this); RV = TLI.PerformDAGCombine(N, DagCombineInfo); } @@ -1377,6 +1382,12 @@ SDValue DAGCombiner::visitADD(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (add x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; } // fold (add x, undef) -> undef @@ -1620,6 +1631,10 @@ SDValue DAGCombiner::visitSUB(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (sub x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (sub x, x) -> 0 @@ -2423,6 +2438,18 @@ SDValue DAGCombiner::visitAND(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (and x, 0) -> 0, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N1; + + // fold (and x, -1) -> x, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N0; } // fold (and x, undef) -> 0 @@ -2606,7 +2633,10 @@ SDValue DAGCombiner::visitAND(SDNode *N) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCAndOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && - (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getSimpleValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -2766,7 +2796,6 @@ SDValue DAGCombiner::visitAND(SDNode *N) { } } } - return SDValue(); } @@ -2959,7 +2988,8 @@ SDValue DAGCombiner::MatchBSwapHWord(SDNode *N, SDValue N0, SDValue N1) { SDValue N00 = N0.getOperand(0); SDValue N01 = N0.getOperand(1); - if (N1.getOpcode() == ISD::OR) { + if (N1.getOpcode() == ISD::OR && + N00.getNumOperands() == 2 && N01.getNumOperands() == 2) { // (or (or (and), (and)), (or (and), (and))) SDValue N000 = N00.getOperand(0); if (!isBSwapHWordElement(N000, Parts)) @@ -3021,6 +3051,18 @@ SDValue DAGCombiner::visitOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (or x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; + + // fold (or x, -1) -> -1, vector edition + if (ISD::isBuildVectorAllOnes(N0.getNode())) + return N0; + if (ISD::isBuildVectorAllOnes(N1.getNode())) + return N1; } // fold (or x, undef) -> -1 @@ -3103,7 +3145,10 @@ SDValue DAGCombiner::visitOR(SDNode *N) { bool isInteger = LL.getValueType().isInteger(); ISD::CondCode Result = ISD::getSetCCOrOperation(Op0, Op1, isInteger); if (Result != ISD::SETCC_INVALID && - (!LegalOperations || TLI.isCondCodeLegal(Result, LL.getValueType()))) + (!LegalOperations || + (TLI.isCondCodeLegal(Result, LL.getSimpleValueType()) && + TLI.isOperationLegal(ISD::SETCC, + TLI.getSetCCResultType(N0.getValueType()))))) return DAG.getSetCC(N->getDebugLoc(), N0.getValueType(), LL, LR, Result); } @@ -3330,6 +3375,12 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { if (VT.isVector()) { SDValue FoldedVOp = SimplifyVBinOp(N); if (FoldedVOp.getNode()) return FoldedVOp; + + // fold (xor x, 0) -> x, vector edition + if (ISD::isBuildVectorAllZeros(N0.getNode())) + return N1; + if (ISD::isBuildVectorAllZeros(N1.getNode())) + return N0; } // fold (xor undef, undef) -> 0. This is a common idiom (misuse). @@ -3360,7 +3411,8 @@ SDValue DAGCombiner::visitXOR(SDNode *N) { ISD::CondCode NotCC = ISD::getSetCCInverse(cast(CC)->get(), isInt); - if (!LegalOperations || TLI.isCondCodeLegal(NotCC, LHS.getValueType())) { + if (!LegalOperations || + TLI.isCondCodeLegal(NotCC, LHS.getSimpleValueType())) { switch (N0.getOpcode()) { default: llvm_unreachable("Unhandled SetCC Equivalent!"); @@ -4444,8 +4496,8 @@ SDValue DAGCombiner::visitSIGN_EXTEND(SDNode *N) { NegOne, DAG.getConstant(0, VT), cast(N0.getOperand(2))->get(), true); if (SCC.getNode()) return SCC; - if (!LegalOperations || - TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT))) + if (!VT.isVector() && (!LegalOperations || + TLI.isOperationLegal(ISD::SETCC, TLI.getSetCCResultType(VT)))) return DAG.getNode(ISD::SELECT, N->getDebugLoc(), VT, DAG.getSetCC(N->getDebugLoc(), TLI.getSetCCResultType(VT), @@ -5025,11 +5077,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // At this point, we must have a load or else we can't do the transform. if (!isa(N0)) return SDValue(); + // Because a SRL must be assumed to *need* to zero-extend the high bits + // (as opposed to anyext the high bits), we can't combine the zextload + // lowering of SRL and an sextload. + if (cast(N0)->getExtensionType() == ISD::SEXTLOAD) + return SDValue(); + // If the shift amount is larger than the input type then we're not // accessing any of the loaded bytes. If the load was a zextload/extload // then the result of the shift+trunc is zero/undef (handled elsewhere). - // If the load was a sextload then the result is a splat of the sign bit - // of the extended byte. This is not worth optimizing for. if (ShAmt >= cast(N0)->getMemoryVT().getSizeInBits()) return SDValue(); } @@ -5048,16 +5104,26 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { // If we haven't found a load, we can't narrow it. Don't transform one with // multiple uses, this would require adding a new load. - if (!isa(N0) || !N0.hasOneUse() || - // Don't change the width of a volatile load. - cast(N0)->isVolatile()) + if (!isa(N0) || !N0.hasOneUse()) + return SDValue(); + + // Don't change the width of a volatile load. + LoadSDNode *LN0 = cast(N0); + if (LN0->isVolatile()) return SDValue(); // Verify that we are actually reducing a load width here. - if (cast(N0)->getMemoryVT().getSizeInBits() < EVTBits) + if (LN0->getMemoryVT().getSizeInBits() < EVTBits) + return SDValue(); + + // For the transform to be legal, the load must produce only two values + // (the value loaded and the chain). Don't transform a pre-increment + // load, for example, which produces an extra value. Otherwise the + // transformation is not equivalent, and the downstream logic to replace + // uses gets things wrong. + if (LN0->getNumValues() > 2) return SDValue(); - LoadSDNode *LN0 = cast(N0); EVT PtrType = N0.getOperand(1).getValueType(); if (PtrType == MVT::Untyped || PtrType.isExtended()) @@ -5101,8 +5167,15 @@ SDValue DAGCombiner::ReduceLoadWidth(SDNode *N) { EVT ShImmTy = getShiftAmountTy(Result.getValueType()); if (!isUIntN(ShImmTy.getSizeInBits(), ShLeftAmt)) ShImmTy = VT; - Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, - Result, DAG.getConstant(ShLeftAmt, ShImmTy)); + // If the shift amount is as large as the result size (but, presumably, + // no larger than the source) then the useful bits of the result are + // zero; we can't simply return the shortened shift, because the result + // of that operation is undefined. + if (ShLeftAmt >= VT.getSizeInBits()) + Result = DAG.getConstant(0, VT); + else + Result = DAG.getNode(ISD::SHL, N0.getDebugLoc(), VT, + Result, DAG.getConstant(ShLeftAmt, ShImmTy)); } // Return the new loaded value. @@ -5187,6 +5260,7 @@ SDValue DAGCombiner::visitSIGN_EXTEND_INREG(SDNode *N) { LN0->getAlignment()); CombineTo(N, ExtLoad); CombineTo(N0.getNode(), ExtLoad, ExtLoad.getValue(1)); + AddToWorkList(ExtLoad.getNode()); return SDValue(N, 0); // Return N so it doesn't get rechecked! } // fold (sext_inreg (zextload x)) -> (sextload x) iff load has one use @@ -5287,6 +5361,38 @@ SDValue DAGCombiner::visitTRUNCATE(SDNode *N) { } } + // Fold a series of buildvector, bitcast, and truncate if possible. + // For example fold + // (2xi32 trunc (bitcast ((4xi32)buildvector x, x, y, y) 2xi64)) to + // (2xi32 (buildvector x, y)). + if (Level == AfterLegalizeVectorOps && VT.isVector() && + N0.getOpcode() == ISD::BITCAST && N0.hasOneUse() && + N0.getOperand(0).getOpcode() == ISD::BUILD_VECTOR && + N0.getOperand(0).hasOneUse()) { + + SDValue BuildVect = N0.getOperand(0); + EVT BuildVectEltTy = BuildVect.getValueType().getVectorElementType(); + EVT TruncVecEltTy = VT.getVectorElementType(); + + // Check that the element types match. + if (BuildVectEltTy == TruncVecEltTy) { + // Now we only need to compute the offset of the truncated elements. + unsigned BuildVecNumElts = BuildVect.getNumOperands(); + unsigned TruncVecNumElts = VT.getVectorNumElements(); + unsigned TruncEltOffset = BuildVecNumElts / TruncVecNumElts; + + assert((BuildVecNumElts % TruncVecNumElts) == 0 && + "Invalid number of elements"); + + SmallVector Opnds; + for (unsigned i = 0, e = BuildVecNumElts; i != e; i += TruncEltOffset) + Opnds.push_back(BuildVect.getOperand(i)); + + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), VT, &Opnds[0], + Opnds.size()); + } + } + // See if we can simplify the input to this truncate through knowledge that // only the low bits are being used. // For example "trunc (or (shl x, 8), y)" // -> trunc y @@ -5729,14 +5835,25 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { DAG.getNode(ISD::FADD, N->getDebugLoc(), VT, N0.getOperand(1), N1)); + // No FP constant should be created after legalization as Instruction + // Selection pass has hard time in dealing with FP constant. + // + // We don't need test this condition for transformation like following, as + // the DAG being transformed implies it is legal to take FP constant as + // operand. + // + // (fadd (fmul c, x), x) -> (fmul c+1, x) + // + bool AllowNewFpConst = (Level < AfterLegalizeDAG); + // If allow, fold (fadd (fneg x), x) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N0.getOpcode() == ISD::FNEG && N0.getOperand(0) == N1) { return DAG.getConstantFP(0.0, VT); } // If allow, fold (fadd x, (fneg x)) -> 0.0 - if (DAG.getTarget().Options.UnsafeFPMath && + if (AllowNewFpConst && DAG.getTarget().Options.UnsafeFPMath && N1.getOpcode() == ISD::FNEG && N1.getOperand(0) == N0) { return DAG.getConstantFP(0.0, VT); } @@ -5769,13 +5886,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N1, NewCFP); } - // (fadd (fadd x, x), x) -> (fmul 3.0, x) - if (!CFP00 && !CFP01 && N0.getOperand(0) == N0.getOperand(1) && - N0.getOperand(0) == N1) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N1, DAG.getConstantFP(3.0, VT)); - } - // (fadd (fmul c, x), (fadd x, x)) -> (fmul c+2, x) if (CFP00 && !CFP01 && N1.getOpcode() == ISD::FADD && N1.getOperand(0) == N1.getOperand(1) && @@ -5821,12 +5931,6 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { N0, NewCFP); } - // (fadd x, (fadd x, x)) -> (fmul 3.0, x) - if (!CFP10 && !CFP11 && N1.getOperand(0) == N1.getOperand(1) && - N1.getOperand(0) == N0) { - return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, - N0, DAG.getConstantFP(3.0, VT)); - } // (fadd (fadd x, x), (fmul c, x)) -> (fmul c+2, x) if (CFP10 && !CFP11 && N1.getOpcode() == ISD::FADD && @@ -5851,8 +5955,29 @@ SDValue DAGCombiner::visitFADD(SDNode *N) { } } + if (N0.getOpcode() == ISD::FADD && AllowNewFpConst) { + ConstantFPSDNode *CFP = dyn_cast(N0.getOperand(0)); + // (fadd (fadd x, x), x) -> (fmul 3.0, x) + if (!CFP && N0.getOperand(0) == N0.getOperand(1) && + (N0.getOperand(0) == N1)) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N1, DAG.getConstantFP(3.0, VT)); + } + } + + if (N1.getOpcode() == ISD::FADD && AllowNewFpConst) { + ConstantFPSDNode *CFP10 = dyn_cast(N1.getOperand(0)); + // (fadd x, (fadd x, x)) -> (fmul 3.0, x) + if (!CFP10 && N1.getOperand(0) == N1.getOperand(1) && + N1.getOperand(0) == N0) { + return DAG.getNode(ISD::FMUL, N->getDebugLoc(), VT, + N0, DAG.getConstantFP(3.0, VT)); + } + } + // (fadd (fadd x, x), (fadd x, x)) -> (fmul 4.0, x) - if (N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && + if (AllowNewFpConst && + N0.getOpcode() == ISD::FADD && N1.getOpcode() == ISD::FADD && N0.getOperand(0) == N0.getOperand(1) && N1.getOperand(0) == N1.getOperand(1) && N0.getOperand(0) == N1.getOperand(0)) { @@ -6596,7 +6721,8 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { // fold a brcond with a setcc condition into a BR_CC node if BR_CC is legal // on the target. if (N1.getOpcode() == ISD::SETCC && - TLI.isOperationLegalOrCustom(ISD::BR_CC, MVT::Other)) { + TLI.isOperationLegalOrCustom(ISD::BR_CC, + N1.getOperand(0).getValueType())) { return DAG.getNode(ISD::BR_CC, N->getDebugLoc(), MVT::Other, Chain, N1.getOperand(2), N1.getOperand(0), N1.getOperand(1), N2); @@ -6682,18 +6808,24 @@ SDValue DAGCombiner::visitBRCOND(SDNode *N) { if (Op0.getOpcode() == Op1.getOpcode()) { // Avoid missing important xor optimizations. SDValue Tmp = visitXOR(TheXor); - if (Tmp.getNode() && Tmp.getNode() != TheXor) { - DEBUG(dbgs() << "\nReplacing.8 "; - TheXor->dump(&DAG); - dbgs() << "\nWith: "; - Tmp.getNode()->dump(&DAG); - dbgs() << '\n'); - WorkListRemover DeadNodes(*this); - DAG.ReplaceAllUsesOfValueWith(N1, Tmp); - removeFromWorkList(TheXor); - DAG.DeleteNode(TheXor); - return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), - MVT::Other, Chain, Tmp, N2); + if (Tmp.getNode()) { + if (Tmp.getNode() != TheXor) { + DEBUG(dbgs() << "\nReplacing.8 "; + TheXor->dump(&DAG); + dbgs() << "\nWith: "; + Tmp.getNode()->dump(&DAG); + dbgs() << '\n'); + WorkListRemover DeadNodes(*this); + DAG.ReplaceAllUsesOfValueWith(N1, Tmp); + removeFromWorkList(TheXor); + DAG.DeleteNode(TheXor); + return DAG.getNode(ISD::BRCOND, N->getDebugLoc(), + MVT::Other, Chain, Tmp, N2); + } + + // visitXOR has changed XOR's operands or replaced the XOR completely, + // bail out. + return SDValue(N, 0); } } @@ -6772,7 +6904,7 @@ static bool canFoldInAddressingMode(SDNode *N, SDNode *Use, } else return false; - AddrMode AM; + TargetLowering::AddrMode AM; if (N->getOpcode() == ISD::ADD) { ConstantSDNode *Offset = dyn_cast(N->getOperand(1)); if (Offset) @@ -6841,6 +6973,16 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { ISD::MemIndexedMode AM = ISD::UNINDEXED; if (!TLI.getPreIndexedAddressParts(N, BasePtr, Offset, AM, DAG)) return false; + + // Backends without true r+i pre-indexed forms may need to pass a + // constant base with a variable offset so that constant coercion + // will work with the patterns in canonical form. + bool Swapped = false; + if (isa(BasePtr)) { + std::swap(BasePtr, Offset); + Swapped = true; + } + // Don't create a indexed load / store with zero offset. if (isa(Offset) && cast(Offset)->isNullValue()) @@ -6866,6 +7008,48 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { return false; } + // If the offset is a constant, there may be other adds of constants that + // can be folded with this one. We should do this to avoid having to keep + // a copy of the original base pointer. + SmallVector OtherUses; + if (isa(Offset)) + for (SDNode::use_iterator I = BasePtr.getNode()->use_begin(), + E = BasePtr.getNode()->use_end(); I != E; ++I) { + SDNode *Use = *I; + if (Use == Ptr.getNode()) + continue; + + if (Use->isPredecessorOf(N)) + continue; + + if (Use->getOpcode() != ISD::ADD && Use->getOpcode() != ISD::SUB) { + OtherUses.clear(); + break; + } + + SDValue Op0 = Use->getOperand(0), Op1 = Use->getOperand(1); + if (Op1.getNode() == BasePtr.getNode()) + std::swap(Op0, Op1); + assert(Op0.getNode() == BasePtr.getNode() && + "Use of ADD/SUB but not an operand"); + + if (!isa(Op1)) { + OtherUses.clear(); + break; + } + + // FIXME: In some cases, we can be smarter about this. + if (Op1.getValueType() != Offset.getValueType()) { + OtherUses.clear(); + break; + } + + OtherUses.push_back(Use); + } + + if (Swapped) + std::swap(BasePtr, Offset); + // Now check for #3 and #4. bool RealUse = false; @@ -6915,6 +7099,43 @@ bool DAGCombiner::CombineToPreIndexedLoadStore(SDNode *N) { // Finally, since the node is now dead, remove it from the graph. DAG.DeleteNode(N); + if (Swapped) + std::swap(BasePtr, Offset); + + // Replace other uses of BasePtr that can be updated to use Ptr + for (unsigned i = 0, e = OtherUses.size(); i != e; ++i) { + unsigned OffsetIdx = 1; + if (OtherUses[i]->getOperand(OffsetIdx).getNode() == BasePtr.getNode()) + OffsetIdx = 0; + assert(OtherUses[i]->getOperand(!OffsetIdx).getNode() == + BasePtr.getNode() && "Expected BasePtr operand"); + + APInt OV = + cast(Offset)->getAPIntValue(); + if (AM == ISD::PRE_DEC) + OV = -OV; + + ConstantSDNode *CN = + cast(OtherUses[i]->getOperand(OffsetIdx)); + APInt CNV = CN->getAPIntValue(); + if (OtherUses[i]->getOpcode() == ISD::SUB && OffsetIdx == 1) + CNV += OV; + else + CNV -= OV; + + SDValue NewOp1 = Result.getValue(isLoad ? 1 : 0); + SDValue NewOp2 = DAG.getConstant(CNV, CN->getValueType(0)); + if (OffsetIdx == 0) + std::swap(NewOp1, NewOp2); + + SDValue NewUse = DAG.getNode(OtherUses[i]->getOpcode(), + OtherUses[i]->getDebugLoc(), + OtherUses[i]->getValueType(0), NewOp1, NewOp2); + DAG.ReplaceAllUsesOfValueWith(SDValue(OtherUses[i], 0), NewUse); + removeFromWorkList(OtherUses[i]); + DAG.DeleteNode(OtherUses[i]); + } + // Replace the uses of Ptr with uses of the updated base value. DAG.ReplaceAllUsesOfValueWith(Ptr, Result.getValue(isLoad ? 1 : 0)); removeFromWorkList(Ptr.getNode()); @@ -7123,12 +7344,15 @@ SDValue DAGCombiner::visitLOAD(SDNode *N) { // Try to infer better alignment information than the load already has. if (OptLevel != CodeGenOpt::None && LD->isUnindexed()) { if (unsigned Align = DAG.InferPtrAlignment(Ptr)) { - if (Align > LD->getAlignment()) - return DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), + if (Align > LD->getMemOperand()->getBaseAlignment()) { + SDValue NewLoad = + DAG.getExtLoad(LD->getExtensionType(), N->getDebugLoc(), LD->getValueType(0), Chain, Ptr, LD->getPointerInfo(), LD->getMemoryVT(), LD->isVolatile(), LD->isNonTemporal(), Align); + return CombineTo(N, NewLoad, SDValue(NewLoad.getNode(), 1), true); + } } } @@ -7386,7 +7610,8 @@ SDValue DAGCombiner::ReduceLoadOpStoreWidth(SDNode *N) { // start at the previous one. if (ShAmt % NewBW) ShAmt = (((ShAmt + NewBW - 1) / NewBW) * NewBW) - NewBW; - APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, ShAmt + NewBW); + APInt Mask = APInt::getBitsSet(BitWidth, ShAmt, + std::min(BitWidth, ShAmt + NewBW)); if ((Imm & Mask) == Imm) { APInt NewImm = (Imm & Mask).lshr(ShAmt).trunc(NewBW); if (Opc == ISD::AND) @@ -7486,16 +7711,82 @@ SDValue DAGCombiner::TransformFPLoadStorePair(SDNode *N) { return SDValue(); } -/// Returns the base pointer and an integer offset from that object. -static std::pair GetPointerBaseAndOffset(SDValue Ptr) { - if (Ptr->getOpcode() == ISD::ADD && isa(Ptr->getOperand(1))) { - int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); - SDValue Base = Ptr->getOperand(0); - return std::make_pair(Base, Offset); +/// Helper struct to parse and store a memory address as base + index + offset. +/// We ignore sign extensions when it is safe to do so. +/// The following two expressions are not equivalent. To differentiate we need +/// to store whether there was a sign extension involved in the index +/// computation. +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (add (i8 load %index) +/// (i8 1)))) +/// vs +/// +/// (load (i64 add (i64 copyfromreg %c) +/// (i64 signextend (i32 add (i32 signextend (i8 load %index)) +/// (i32 1))))) +struct BaseIndexOffset { + SDValue Base; + SDValue Index; + int64_t Offset; + bool IsIndexSignExt; + + BaseIndexOffset() : Offset(0), IsIndexSignExt(false) {} + + BaseIndexOffset(SDValue Base, SDValue Index, int64_t Offset, + bool IsIndexSignExt) : + Base(Base), Index(Index), Offset(Offset), IsIndexSignExt(IsIndexSignExt) {} + + bool equalBaseIndex(const BaseIndexOffset &Other) { + return Other.Base == Base && Other.Index == Index && + Other.IsIndexSignExt == IsIndexSignExt; } - return std::make_pair(Ptr, 0); -} + /// Parses tree in Ptr for base, index, offset addresses. + static BaseIndexOffset match(SDValue Ptr) { + bool IsIndexSignExt = false; + + // Just Base or possibly anything else. + if (Ptr->getOpcode() != ISD::ADD) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Base + offset. + if (isa(Ptr->getOperand(1))) { + int64_t Offset = cast(Ptr->getOperand(1))->getSExtValue(); + return BaseIndexOffset(Ptr->getOperand(0), SDValue(), Offset, + IsIndexSignExt); + } + + // Look at Base + Index + Offset cases. + SDValue Base = Ptr->getOperand(0); + SDValue IndexOffset = Ptr->getOperand(1); + + // Skip signextends. + if (IndexOffset->getOpcode() == ISD::SIGN_EXTEND) { + IndexOffset = IndexOffset->getOperand(0); + IsIndexSignExt = true; + } + + // Either the case of Base + Index (no offset) or something else. + if (IndexOffset->getOpcode() != ISD::ADD) + return BaseIndexOffset(Base, IndexOffset, 0, IsIndexSignExt); + + // Now we have the case of Base + Index + offset. + SDValue Index = IndexOffset->getOperand(0); + SDValue Offset = IndexOffset->getOperand(1); + + if (!isa(Offset)) + return BaseIndexOffset(Ptr, SDValue(), 0, IsIndexSignExt); + + // Ignore signextends. + if (Index->getOpcode() == ISD::SIGN_EXTEND) { + Index = Index->getOperand(0); + IsIndexSignExt = true; + } else IsIndexSignExt = false; + + int64_t Off = cast(Offset)->getSExtValue(); + return BaseIndexOffset(Base, Index, Off, IsIndexSignExt); + } +}; /// Holds a pointer to an LSBaseSDNode as well as information on where it /// is located in a sequence of memory operations connected by a chain. @@ -7522,6 +7813,8 @@ struct ConsecutiveMemoryChainSorter { bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT MemVT = St->getMemoryVT(); int64_t ElementSizeBytes = MemVT.getSizeInBits()/8; + bool NoVectors = DAG.getMachineFunction().getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::NoImplicitFloat); // Don't merge vectors into wider inputs. if (MemVT.isVector() || !MemVT.isSimple()) @@ -7540,19 +7833,26 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Chain->hasOneUse() && Chain->use_begin()->getOpcode() == ISD::STORE) return false; - // This holds the base pointer and the offset in bytes from the base pointer. - std::pair BasePtr = - GetPointerBaseAndOffset(St->getBasePtr()); + // This holds the base pointer, index, and the offset in bytes from the base + // pointer. + BaseIndexOffset BasePtr = BaseIndexOffset::match(St->getBasePtr()); // We must have a base and an offset. - if (!BasePtr.first.getNode()) + if (!BasePtr.Base.getNode()) return false; // Do not handle stores to undef base pointers. - if (BasePtr.first.getOpcode() == ISD::UNDEF) + if (BasePtr.Base.getOpcode() == ISD::UNDEF) return false; + // Save the LoadSDNodes that we find in the chain. + // We need to make sure that these nodes do not interfere with + // any of the store nodes. + SmallVector AliasLoadNodes; + + // Save the StoreSDNodes that we find in the chain. SmallVector StoreNodes; + // Walk up the chain and look for nodes with offsets from the same // base pointer. Stop when reaching an instruction with a different kind // or instruction which has a different base pointer. @@ -7564,11 +7864,10 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // Find the base pointer and offset for this memory node. - std::pair Ptr = - GetPointerBaseAndOffset(Index->getBasePtr()); + BaseIndexOffset Ptr = BaseIndexOffset::match(Index->getBasePtr()); // Check that the base pointer is the same as the original one. - if (Ptr.first.getNode() != BasePtr.first.getNode()) + if (!Ptr.equalBaseIndex(BasePtr)) break; // Check that the alignment is the same. @@ -7594,10 +7893,28 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { break; // We found a potential memory operand to merge. - StoreNodes.push_back(MemOpLink(Index, Ptr.second, Seq++)); - - // Move up the chain to the next memory operation. - Index = dyn_cast(Index->getChain().getNode()); + StoreNodes.push_back(MemOpLink(Index, Ptr.Offset, Seq++)); + + // Find the next memory operand in the chain. If the next operand in the + // chain is a store then move up and continue the scan with the next + // memory operand. If the next operand is a load save it and use alias + // information to check if it interferes with anything. + SDNode *NextInChain = Index->getChain().getNode(); + while (1) { + if (StoreSDNode *STn = dyn_cast(NextInChain)) { + // We found a store node. Use it for the next iteration. + Index = STn; + break; + } else if (LoadSDNode *Ldn = dyn_cast(NextInChain)) { + // Save the load node for later. Continue the scan. + AliasLoadNodes.push_back(Ldn); + NextInChain = Ldn->getChain().getNode(); + continue; + } else { + Index = NULL; + break; + } + } } // Check if there is anything to merge. @@ -7612,9 +7929,25 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // store memory address. unsigned LastConsecutiveStore = 0; int64_t StartAddress = StoreNodes[0].OffsetFromBase; - for (unsigned i=1; i 0) { + int64_t CurrAddress = StoreNodes[i].OffsetFromBase; + if (CurrAddress - StartAddress != (ElementSizeBytes * i)) + break; + } + + bool Alias = false; + // Check if this store interferes with any of the loads that we found. + for (unsigned ld = 0, lde = AliasLoadNodes.size(); ld < lde; ++ld) + if (isAlias(AliasLoadNodes[ld], StoreNodes[i].MemNode)) { + Alias = true; + break; + } + // We found a load that alias with this store. Stop the sequence. + if (Alias) break; // Mark this node as useful. @@ -7647,6 +7980,14 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { EVT StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); if (TLI.isTypeLegal(StoreTy)) LastLegalType = i+1; + // Or check whether a truncstore is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoredVal.getValueType()); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy)) + LastLegalType = i+1; + } // Find a legal type for the vector store. EVT Ty = EVT::getVectorVT(*DAG.getContext(), MemVT, i+1); @@ -7654,15 +7995,16 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { LastLegalVectorType = i + 1; } - // We only use vectors if the constant is known to be zero. - if (NonZero) + // We only use vectors if the constant is known to be zero and the + // function is not marked with the noimplicitfloat attribute. + if (NonZero || NoVectors) LastLegalVectorType = 0; // Check if we found a legal integer type to store. if (LastLegalType == 0 && LastLegalVectorType == 0) return false; - bool UseVector = LastLegalVectorType > LastLegalType; + bool UseVector = (LastLegalVectorType > LastLegalType) && !NoVectors; unsigned NumElem = UseVector ? LastLegalVectorType : LastLegalType; // Make sure we have something to merge. @@ -7756,7 +8098,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // Find acceptable loads. Loads need to have the same chain (token factor), // must not be zext, volatile, indexed, and they must be consecutive. - SDValue LdBasePtr; + BaseIndexOffset LdBasePtr; for (unsigned i=0; i(StoreNodes[i].MemNode); LoadSDNode *Ld = dyn_cast(St->getValue()); @@ -7782,21 +8124,19 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { if (Ld->getMemoryVT() != MemVT) break; - std::pair LdPtr = - GetPointerBaseAndOffset(Ld->getBasePtr()); - + BaseIndexOffset LdPtr = BaseIndexOffset::match(Ld->getBasePtr()); // If this is not the first ptr that we check. - if (LdBasePtr.getNode()) { + if (LdBasePtr.Base.getNode()) { // The base ptr must be the same. - if (LdPtr.first != LdBasePtr) + if (!LdPtr.equalBaseIndex(LdBasePtr)) break; } else { // Check that all other base pointers are the same as this one. - LdBasePtr = LdPtr.first; + LdBasePtr = LdPtr; } // We found a potential memory operand to merge. - LoadNodes.push_back(MemOpLink(Ld, LdPtr.second, 0)); + LoadNodes.push_back(MemOpLink(Ld, LdPtr.Offset, 0)); } if (LoadNodes.size() < 2) @@ -7815,7 +8155,7 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { // All loads much share the same chain. if (LoadNodes[i].MemNode->getChain() != FirstChain) break; - + int64_t CurrAddress = LoadNodes[i].OffsetFromBase; if (CurrAddress - StartAddress != (ElementSizeBytes * i)) break; @@ -7831,11 +8171,22 @@ bool DAGCombiner::MergeConsecutiveStores(StoreSDNode* St) { StoreTy = EVT::getIntegerVT(*DAG.getContext(), StoreBW); if (TLI.isTypeLegal(StoreTy)) LastLegalIntegerType = i + 1; + // Or check whether a truncstore and extload is legal. + else if (TLI.getTypeAction(*DAG.getContext(), StoreTy) == + TargetLowering::TypePromoteInteger) { + EVT LegalizedStoredValueTy = + TLI.getTypeToTransformTo(*DAG.getContext(), StoreTy); + if (TLI.isTruncStoreLegal(LegalizedStoredValueTy, StoreTy) && + TLI.isLoadExtLegal(ISD::ZEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::SEXTLOAD, StoreTy) && + TLI.isLoadExtLegal(ISD::EXTLOAD, StoreTy)) + LastLegalIntegerType = i+1; + } } // Only use vector types if the vector type is larger than the integer type. // If they are the same, use integers. - bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType; + bool UseVectorTy = LastLegalVectorType > LastLegalIntegerType && !NoVectors; unsigned LastLegalType = std::max(LastLegalVectorType, LastLegalIntegerType); // We add +1 here because the LastXXX variables refer to location while @@ -8116,8 +8467,21 @@ SDValue DAGCombiner::visitSTORE(SDNode *N) { // Only perform this optimization before the types are legal, because we // don't want to perform this optimization on every DAGCombine invocation. - if (!LegalTypes && MergeConsecutiveStores(ST)) - return SDValue(N, 0); + if (!LegalTypes) { + bool EverChanged = false; + + do { + // There can be multiple store sequences on the same chain. + // Keep trying to merge store sequences until we are unable to do so + // or until we merge the last store on the chain. + bool Changed = MergeConsecutiveStores(ST); + EverChanged |= Changed; + if (!Changed) break; + } while (ST->getOpcode() != ISD::DELETED_NODE); + + if (EverChanged) + return SDValue(N, 0); + } return ReduceLoadOpStoreWidth(N); } @@ -8514,11 +8878,8 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { if (Opcode == ISD::DELETED_NODE && (Opc == ISD::UINT_TO_FP || Opc == ISD::SINT_TO_FP)) { Opcode = Opc; - // If not supported by target, bail out. - if (TLI.getOperationAction(Opcode, VT) != TargetLowering::Legal && - TLI.getOperationAction(Opcode, VT) != TargetLowering::Custom) - return SDValue(); } + if (Opc != Opcode) return SDValue(); @@ -8543,6 +8904,10 @@ SDValue DAGCombiner::reduceBuildVecConvertToConvertBuildVec(SDNode *N) { assert(SrcVT != MVT::Other && "Cannot determine source type!"); EVT NVT = EVT::getVectorVT(*DAG.getContext(), SrcVT, NumInScalars); + + if (!TLI.isOperationLegalOrCustom(Opcode, NVT)) + return SDValue(); + SmallVector Opnds; for (unsigned i = 0; i != NumInScalars; ++i) { SDValue In = N->getOperand(i); @@ -8707,12 +9072,32 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { EVT NVT = N->getValueType(0); SDValue V = N->getOperand(0); + if (V->getOpcode() == ISD::CONCAT_VECTORS) { + // Combine: + // (extract_subvec (concat V1, V2, ...), i) + // Into: + // Vi if possible + // Only operand 0 is checked as 'concat' assumes all inputs of the same type. + if (V->getOperand(0).getValueType() != NVT) + return SDValue(); + unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); + unsigned NumElems = NVT.getVectorNumElements(); + assert((Idx % NumElems) == 0 && + "IDX in concat is not a multiple of the result vector length."); + return V->getOperand(Idx / NumElems); + } + + // Skip bitcasting + if (V->getOpcode() == ISD::BITCAST) + V = V.getOperand(0); + if (V->getOpcode() == ISD::INSERT_SUBVECTOR) { + DebugLoc dl = N->getDebugLoc(); // Handle only simple case where vector being inserted and vector // being extracted are of same type, and are half size of larger vectors. EVT BigVT = V->getOperand(0).getValueType(); EVT SmallVT = V->getOperand(1).getValueType(); - if (NVT != SmallVT || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) + if (!NVT.bitsEq(SmallVT) || NVT.getSizeInBits()*2 != BigVT.getSizeInBits()) return SDValue(); // Only handle cases where both indexes are constants with the same type. @@ -8725,30 +9110,18 @@ SDValue DAGCombiner::visitEXTRACT_SUBVECTOR(SDNode* N) { // Combine: // (extract_subvec (insert_subvec V1, V2, InsIdx), ExtIdx) // Into: - // indices are equal => V1 + // indices are equal or bit offsets are equal => V1 // otherwise => (extract_subvec V1, ExtIdx) - if (InsIdx->getZExtValue() == ExtIdx->getZExtValue()) - return V->getOperand(1); - return DAG.getNode(ISD::EXTRACT_SUBVECTOR, N->getDebugLoc(), NVT, - V->getOperand(0), N->getOperand(1)); + if (InsIdx->getZExtValue() * SmallVT.getScalarType().getSizeInBits() == + ExtIdx->getZExtValue() * NVT.getScalarType().getSizeInBits()) + return DAG.getNode(ISD::BITCAST, dl, NVT, V->getOperand(1)); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, NVT, + DAG.getNode(ISD::BITCAST, dl, + N->getOperand(0).getValueType(), + V->getOperand(0)), N->getOperand(1)); } } - if (V->getOpcode() == ISD::CONCAT_VECTORS) { - // Combine: - // (extract_subvec (concat V1, V2, ...), i) - // Into: - // Vi if possible - // Only operand 0 is checked as 'concat' assumes all inputs of the same type. - if (V->getOperand(0).getValueType() != NVT) - return SDValue(); - unsigned Idx = dyn_cast(N->getOperand(1))->getZExtValue(); - unsigned NumElems = NVT.getVectorNumElements(); - assert((Idx % NumElems) == 0 && - "IDX in concat is not a multiple of the result vector length."); - return V->getOperand(Idx / NumElems); - } - return SDValue(); } @@ -8992,11 +9365,6 @@ SDValue DAGCombiner::XformToShuffleWithZero(SDNode *N) { /// SimplifyVBinOp - Visit a binary vector operation, like ADD. SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVBinOp only works on vectors!"); @@ -9066,11 +9434,6 @@ SDValue DAGCombiner::SimplifyVBinOp(SDNode *N) { /// SimplifyVUnaryOp - Visit a binary vector operation, like FABS/FNEG. SDValue DAGCombiner::SimplifyVUnaryOp(SDNode *N) { - // After legalize, the target may be depending on adds and other - // binary ops to provide legal ways to construct constants or other - // things. Simplifying them may result in a loss of legality. - if (LegalOperations) return SDValue(); - assert(N->getValueType(0).isVector() && "SimplifyVUnaryOp only works on vectors!"); @@ -9173,7 +9536,9 @@ bool DAGCombiner::SimplifySelectOps(SDNode *TheSelect, SDValue LHS, // src value info, don't do the transformation if the memory // locations are not in the default address space. LLD->getPointerInfo().getAddrSpace() != 0 || - RLD->getPointerInfo().getAddrSpace() != 0) + RLD->getPointerInfo().getAddrSpace() != 0 || + !TLI.isOperationLegalOrCustom(TheSelect->getOpcode(), + LLD->getBasePtr().getValueType())) return false; // Check that the select condition doesn't reach either load. If so, @@ -9537,7 +9902,7 @@ SDValue DAGCombiner::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, ISD::CondCode Cond, DebugLoc DL, bool foldBooleans) { TargetLowering::DAGCombinerInfo - DagCombineInfo(DAG, !LegalTypes, !LegalOperations, false, this); + DagCombineInfo(DAG, Level, false, this); return TLI.SimplifySetCC(VT, N0, N1, Cond, foldBooleans, DagCombineInfo, DL); } @@ -9680,6 +10045,23 @@ bool DAGCombiner::isAlias(SDValue Ptr1, int64_t Size1, return true; } +bool DAGCombiner::isAlias(LSBaseSDNode *Op0, LSBaseSDNode *Op1) { + SDValue Ptr0, Ptr1; + int64_t Size0, Size1; + const Value *SrcValue0, *SrcValue1; + int SrcValueOffset0, SrcValueOffset1; + unsigned SrcValueAlign0, SrcValueAlign1; + const MDNode *SrcTBAAInfo0, *SrcTBAAInfo1; + FindAliasInfo(Op0, Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0); + FindAliasInfo(Op1, Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); + return isAlias(Ptr0, Size0, SrcValue0, SrcValueOffset0, + SrcValueAlign0, SrcTBAAInfo0, + Ptr1, Size1, SrcValue1, SrcValueOffset1, + SrcValueAlign1, SrcTBAAInfo1); +} + /// FindAliasInfo - Extracts the relevant alias information from the memory /// node. Returns true if the operand was a load. bool DAGCombiner::FindAliasInfo(SDNode *N, diff --git a/lib/CodeGen/SelectionDAG/FastISel.cpp b/lib/CodeGen/SelectionDAG/FastISel.cpp index 4854cf7b261f..9ac738e50726 100644 --- a/lib/CodeGen/SelectionDAG/FastISel.cpp +++ b/lib/CodeGen/SelectionDAG/FastISel.cpp @@ -40,27 +40,27 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Operator.h" -#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Loads.h" +#include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/Loads.h" -#include "llvm/DataLayout.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Operator.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Debug.h" -#include "llvm/ADT/Statistic.h" using namespace llvm; STATISTIC(NumFastIselSuccessIndependent, "Number of insts selected by " @@ -87,6 +87,27 @@ void FastISel::startNewBlock() { LastLocalValue = EmitStartPt; } +bool FastISel::LowerArguments() { + if (!FuncInfo.CanLowerReturn) + // Fallback to SDISel argument lowering code to deal with sret pointer + // parameter. + return false; + + if (!FastLowerArguments()) + return false; + + // Enter non-dead arguments into ValueMap for uses in non-entry BBs. + for (Function::const_arg_iterator I = FuncInfo.Fn->arg_begin(), + E = FuncInfo.Fn->arg_end(); I != E; ++I) { + if (!I->use_empty()) { + DenseMap::iterator VI = LocalValueMap.find(I); + assert(VI != LocalValueMap.end() && "Missed an argument?"); + FuncInfo.ValueMap[I] = VI->second; + } + } + return true; +} + void FastISel::flushLocalValueMap() { LocalValueMap.clear(); LastLocalValue = EmitStartPt; @@ -675,6 +696,13 @@ bool FastISel::SelectCall(const User *I) { UpdateValueMap(Call, ResultReg); return true; } + case Intrinsic::expect: { + unsigned ResultReg = getRegForValue(Call->getArgOperand(0)); + if (ResultReg == 0) + return false; + UpdateValueMap(Call, ResultReg); + return true; + } } // Usually, it does not make sense to initialize a value, @@ -684,7 +712,7 @@ bool FastISel::SelectCall(const User *I) { // all the values which have already been materialized, // appear after the call. It also makes sense to skip intrinsics // since they tend to be inlined. - if (!isa(F)) + if (!isa(Call)) flushLocalValueMap(); // An arbitrary call. Bail. @@ -737,15 +765,15 @@ bool FastISel::SelectBitCast(const User *I) { } // Bitcasts of other values become reg-reg copies or BITCAST operators. - EVT SrcVT = TLI.getValueType(I->getOperand(0)->getType()); - EVT DstVT = TLI.getValueType(I->getType()); - - if (SrcVT == MVT::Other || !SrcVT.isSimple() || - DstVT == MVT::Other || !DstVT.isSimple() || - !TLI.isTypeLegal(SrcVT) || !TLI.isTypeLegal(DstVT)) + EVT SrcEVT = TLI.getValueType(I->getOperand(0)->getType()); + EVT DstEVT = TLI.getValueType(I->getType()); + if (SrcEVT == MVT::Other || DstEVT == MVT::Other || + !TLI.isTypeLegal(SrcEVT) || !TLI.isTypeLegal(DstEVT)) // Unhandled type. Halt "fast" selection and bail. return false; + MVT SrcVT = SrcEVT.getSimpleVT(); + MVT DstVT = DstEVT.getSimpleVT(); unsigned Op0 = getRegForValue(I->getOperand(0)); if (Op0 == 0) // Unhandled operand. Halt "fast" selection and bail. @@ -755,7 +783,7 @@ bool FastISel::SelectBitCast(const User *I) { // First, try to perform the bitcast by inserting a reg-reg copy. unsigned ResultReg = 0; - if (SrcVT.getSimpleVT() == DstVT.getSimpleVT()) { + if (SrcVT == DstVT) { const TargetRegisterClass* SrcClass = TLI.getRegClassFor(SrcVT); const TargetRegisterClass* DstClass = TLI.getRegClassFor(DstVT); // Don't attempt a cross-class copy. It will likely fail. @@ -768,8 +796,7 @@ bool FastISel::SelectBitCast(const User *I) { // If the reg-reg copy failed, select a BITCAST opcode. if (!ResultReg) - ResultReg = FastEmit_r(SrcVT.getSimpleVT(), DstVT.getSimpleVT(), - ISD::BITCAST, Op0, Op0IsKill); + ResultReg = FastEmit_r(SrcVT, DstVT, ISD::BITCAST, Op0, Op0IsKill); if (!ResultReg) return false; @@ -837,7 +864,8 @@ FastISel::SelectInstruction(const Instruction *I) { void FastISel::FastEmitBranch(MachineBasicBlock *MSucc, DebugLoc DL) { - if (FuncInfo.MBB->getBasicBlock()->size() > 1 && FuncInfo.MBB->isLayoutSuccessor(MSucc)) { + if (FuncInfo.MBB->getBasicBlock()->size() > 1 && + FuncInfo.MBB->isLayoutSuccessor(MSucc)) { // For more accurate line information if this is the only instruction // in the block then emit it, otherwise we have the unconditional // fall-through case, which needs no instructions. @@ -1068,6 +1096,10 @@ FastISel::FastISel(FunctionLoweringInfo &funcInfo, FastISel::~FastISel() {} +bool FastISel::FastLowerArguments() { + return false; +} + unsigned FastISel::FastEmit_(MVT, MVT, unsigned) { return 0; @@ -1151,6 +1183,8 @@ unsigned FastISel::FastEmit_ri_(MVT VT, unsigned Opcode, IntegerType *ITy = IntegerType::get(FuncInfo.Fn->getContext(), VT.getSizeInBits()); MaterialReg = getRegForValue(ConstantInt::get(ITy, Imm)); + assert (MaterialReg != 0 && "Unable to materialize imm."); + if (MaterialReg == 0) return 0; } return FastEmit_rr(VT, VT, Opcode, Op0, Op0IsKill, diff --git a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp index a4182906cbf4..b46edad7a3d4 100644 --- a/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp +++ b/lib/CodeGen/SelectionDAG/FunctionLoweringInfo.cpp @@ -13,29 +13,29 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "function-lowering-info" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/PostOrderIterator.h" #include "llvm/CodeGen/Analysis.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -66,8 +66,7 @@ void FunctionLoweringInfo::set(const Function &fn, MachineFunction &mf) { // Check whether the function can return without sret-demotion. SmallVector Outs; - GetReturnInfo(Fn->getReturnType(), - Fn->getAttributes().getRetAttributes(), Outs, TLI); + GetReturnInfo(Fn->getReturnType(), Fn->getAttributes(), Outs, TLI); CanLowerReturn = TLI.CanLowerReturn(Fn->getCallingConv(), *MF, Fn->isVarArg(), Outs, Fn->getContext()); @@ -208,7 +207,7 @@ void FunctionLoweringInfo::clear() { } /// CreateReg - Allocate a single virtual register for the given type. -unsigned FunctionLoweringInfo::CreateReg(EVT VT) { +unsigned FunctionLoweringInfo::CreateReg(MVT VT) { return RegInfo->createVirtualRegister(TLI.getRegClassFor(VT)); } @@ -226,7 +225,7 @@ unsigned FunctionLoweringInfo::CreateRegs(Type *Ty) { unsigned FirstReg = 0; for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; - EVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); + MVT RegisterVT = TLI.getRegisterType(Ty->getContext(), ValueVT); unsigned NumRegs = TLI.getNumRegisters(Ty->getContext(), ValueVT); for (unsigned i = 0; i != NumRegs; ++i) { diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index a8381b25ba12..3b1abd7c836e 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -16,18 +16,18 @@ #define DEBUG_TYPE "instr-emitter" #include "InstrEmitter.h" #include "SDNodeDbgValue.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; /// MinRCSize - Smallest register class we allow when constraining virtual @@ -99,7 +99,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, // the CopyToReg'd destination register instead of creating a new vreg. bool MatchReg = true; const TargetRegisterClass *UseRC = NULL; - EVT VT = Node->getValueType(ResNo); + MVT VT = Node->getSimpleValueType(ResNo); // Stick to the preferred register classes for legal types. if (TLI->isTypeLegal(VT)) @@ -124,7 +124,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, SDValue Op = User->getOperand(i); if (Op.getNode() != Node || Op.getResNo() != ResNo) continue; - EVT VT = Node->getValueType(Op.getResNo()); + MVT VT = Node->getSimpleValueType(Op.getResNo()); if (VT == MVT::Other || VT == MVT::Glue) continue; Match = false; @@ -203,7 +203,8 @@ unsigned InstrEmitter::getDstOfOnlyCopyToRegUse(SDNode *Node, return 0; } -void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, +void InstrEmitter::CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap &VRBaseMap) { @@ -222,7 +223,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, unsigned NumResults = CountResults(Node); VRBase = cast(Node->getOperand(i-NumResults))->getReg(); assert(TargetRegisterInfo::isPhysicalRegister(VRBase)); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MIB.addReg(VRBase, RegState::Define); } if (!VRBase && !IsClone && !IsCloned) @@ -237,7 +238,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, const TargetRegisterClass *RegRC = MRI->getRegClass(Reg); if (RegRC == RC) { VRBase = Reg; - MI->addOperand(MachineOperand::CreateReg(Reg, true)); + MIB.addReg(VRBase, RegState::Define); break; } } @@ -249,7 +250,7 @@ void InstrEmitter::CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, if (VRBase == 0) { assert(RC && "Isn't a register operand!"); VRBase = MRI->createVirtualRegister(RC); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MIB.addReg(VRBase, RegState::Define); } SDValue Op(Node, i); @@ -272,7 +273,8 @@ unsigned InstrEmitter::getVR(SDValue Op, // IMPLICIT_DEF can produce any type of result so its MCInstrDesc // does not include operand register class info. if (!VReg) { - const TargetRegisterClass *RC = TLI->getRegClassFor(Op.getValueType()); + const TargetRegisterClass *RC = + TLI->getRegClassFor(Op.getSimpleValueType()); VReg = MRI->createVirtualRegister(RC); } BuildMI(*MBB, InsertPos, Op.getDebugLoc(), @@ -290,7 +292,8 @@ unsigned InstrEmitter::getVR(SDValue Op, /// specified machine instr. Insert register copies if the register is /// not in the required register class. void -InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, +InstrEmitter::AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, @@ -302,7 +305,7 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, unsigned VReg = getVR(Op, VRBaseMap); assert(TargetRegisterInfo::isVirtualRegister(VReg) && "Not a vreg?"); - const MCInstrDesc &MCID = MI->getDesc(); + const MCInstrDesc &MCID = MIB->getDesc(); bool isOptDef = IIOpNum < MCID.getNumOperands() && MCID.OpInfo[IIOpNum].isOptionalDef(); @@ -334,56 +337,53 @@ InstrEmitter::AddRegisterOperand(MachineInstr *MI, SDValue Op, !IsDebug && !(IsClone || IsCloned); if (isKill) { - unsigned Idx = MI->getNumOperands(); + unsigned Idx = MIB->getNumOperands(); while (Idx > 0 && - MI->getOperand(Idx-1).isReg() && MI->getOperand(Idx-1).isImplicit()) + MIB->getOperand(Idx-1).isReg() && + MIB->getOperand(Idx-1).isImplicit()) --Idx; - bool isTied = MI->getDesc().getOperandConstraint(Idx, MCOI::TIED_TO) != -1; + bool isTied = MCID.getOperandConstraint(Idx, MCOI::TIED_TO) != -1; if (isTied) isKill = false; } - MI->addOperand(MachineOperand::CreateReg(VReg, isOptDef, - false/*isImp*/, isKill, - false/*isDead*/, false/*isUndef*/, - false/*isEarlyClobber*/, - 0/*SubReg*/, IsDebug)); + MIB.addReg(VReg, getDefRegState(isOptDef) | getKillRegState(isKill) | + getDebugRegState(IsDebug)); } /// AddOperand - Add the specified operand to the specified machine instr. II /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. -void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, +void InstrEmitter::AddOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, bool IsDebug, bool IsClone, bool IsCloned) { if (Op.isMachineOpcode()) { - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } else if (ConstantSDNode *C = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateImm(C->getSExtValue())); + MIB.addImm(C->getSExtValue()); } else if (ConstantFPSDNode *F = dyn_cast(Op)) { - const ConstantFP *CFP = F->getConstantFPValue(); - MI->addOperand(MachineOperand::CreateFPImm(CFP)); + MIB.addFPImm(F->getConstantFPValue()); } else if (RegisterSDNode *R = dyn_cast(Op)) { // Turn additional physreg operands into implicit uses on non-variadic // instructions. This is used by call and return instructions passing // arguments in registers. bool Imp = II && (IIOpNum >= II->getNumOperands() && !II->isVariadic()); - MI->addOperand(MachineOperand::CreateReg(R->getReg(), false, Imp)); + MIB.addReg(R->getReg(), getImplRegState(Imp)); } else if (RegisterMaskSDNode *RM = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateRegMask(RM->getRegMask())); + MIB.addRegMask(RM->getRegMask()); } else if (GlobalAddressSDNode *TGA = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateGA(TGA->getGlobal(), TGA->getOffset(), - TGA->getTargetFlags())); + MIB.addGlobalAddress(TGA->getGlobal(), TGA->getOffset(), + TGA->getTargetFlags()); } else if (BasicBlockSDNode *BBNode = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateMBB(BBNode->getBasicBlock())); + MIB.addMBB(BBNode->getBasicBlock()); } else if (FrameIndexSDNode *FI = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateFI(FI->getIndex())); + MIB.addFrameIndex(FI->getIndex()); } else if (JumpTableSDNode *JT = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateJTI(JT->getIndex(), - JT->getTargetFlags())); + MIB.addJumpTableIndex(JT->getIndex(), JT->getTargetFlags()); } else if (ConstantPoolSDNode *CP = dyn_cast(Op)) { int Offset = CP->getOffset(); unsigned Align = CP->getAlignment(); @@ -403,30 +403,26 @@ void InstrEmitter::AddOperand(MachineInstr *MI, SDValue Op, Idx = MCP->getConstantPoolIndex(CP->getMachineCPVal(), Align); else Idx = MCP->getConstantPoolIndex(CP->getConstVal(), Align); - MI->addOperand(MachineOperand::CreateCPI(Idx, Offset, - CP->getTargetFlags())); + MIB.addConstantPoolIndex(Idx, Offset, CP->getTargetFlags()); } else if (ExternalSymbolSDNode *ES = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateES(ES->getSymbol(), - ES->getTargetFlags())); + MIB.addExternalSymbol(ES->getSymbol(), ES->getTargetFlags()); } else if (BlockAddressSDNode *BA = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateBA(BA->getBlockAddress(), - BA->getOffset(), - BA->getTargetFlags())); + MIB.addBlockAddress(BA->getBlockAddress(), + BA->getOffset(), + BA->getTargetFlags()); } else if (TargetIndexSDNode *TI = dyn_cast(Op)) { - MI->addOperand(MachineOperand::CreateTargetIndex(TI->getIndex(), - TI->getOffset(), - TI->getTargetFlags())); + MIB.addTargetIndex(TI->getIndex(), TI->getOffset(), TI->getTargetFlags()); } else { assert(Op.getValueType() != MVT::Other && Op.getValueType() != MVT::Glue && "Chain and glue operands should occur at end of operand list!"); - AddRegisterOperand(MI, Op, IIOpNum, II, VRBaseMap, + AddRegisterOperand(MIB, Op, IIOpNum, II, VRBaseMap, IsDebug, IsClone, IsCloned); } } unsigned InstrEmitter::ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - EVT VT, DebugLoc DL) { + MVT VT, DebugLoc DL) { const TargetRegisterClass *VRC = MRI->getRegClass(VReg); const TargetRegisterClass *RC = TRI->getSubClassWithSubReg(VRC, SubIdx); @@ -477,7 +473,8 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // constraints on the %dst register, COPY can target all legal register // classes. unsigned SubIdx = cast(Node->getOperand(1))->getZExtValue(); - const TargetRegisterClass *TRC = TLI->getRegClassFor(Node->getValueType(0)); + const TargetRegisterClass *TRC = + TLI->getRegClassFor(Node->getSimpleValueType(0)); unsigned VReg = getVR(Node->getOperand(0), VRBaseMap); MachineInstr *DefMI = MRI->getVRegDef(VReg); @@ -500,7 +497,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // constrain its register class or issue a COPY to a compatible register // class. VReg = ConstrainForSubReg(VReg, SubIdx, - Node->getOperand(0).getValueType(), + Node->getOperand(0).getSimpleValueType(), Node->getDebugLoc()); // Create the destreg if it is missing. @@ -532,7 +529,7 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, // // There is no constraint on the %src register class. // - const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getValueType(0)); + const TargetRegisterClass *SRC = TLI->getRegClassFor(Node->getSimpleValueType(0)); SRC = TRI->getSubClassWithSubReg(SRC, SubIdx); assert(SRC && "No register class supports VT and SubIdx for INSERT_SUBREG"); @@ -540,22 +537,22 @@ void InstrEmitter::EmitSubregNode(SDNode *Node, VRBase = MRI->createVirtualRegister(SRC); // Create the insert_subreg or subreg_to_reg machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc)); - MI->addOperand(MachineOperand::CreateReg(VRBase, true)); + MachineInstrBuilder MIB = + BuildMI(*MF, Node->getDebugLoc(), TII->get(Opc), VRBase); // If creating a subreg_to_reg, then the first input operand // is an implicit value immediate, otherwise it's a register if (Opc == TargetOpcode::SUBREG_TO_REG) { const ConstantSDNode *SD = cast(N0); - MI->addOperand(MachineOperand::CreateImm(SD->getZExtValue())); + MIB.addImm(SD->getZExtValue()); } else - AddOperand(MI, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N0, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Add the subregster being inserted - AddOperand(MI, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, N1, 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); - MI->addOperand(MachineOperand::CreateImm(SubIdx)); - MBB->insert(InsertPos, MI); + MIB.addImm(SubIdx); + MBB->insert(InsertPos, MIB); } else llvm_unreachable("Node is not insert_subreg, extract_subreg, or subreg_to_reg"); @@ -596,12 +593,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); unsigned NewVReg = MRI->createVirtualRegister(TRI->getAllocatableClass(RC)); - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::REG_SEQUENCE), NewVReg); + const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II, NewVReg); unsigned NumOps = Node->getNumOperands(); assert((NumOps & 1) == 1 && "REG_SEQUENCE must have an odd number of operands!"); - const MCInstrDesc &II = TII->get(TargetOpcode::REG_SEQUENCE); for (unsigned i = 1; i != NumOps; ++i) { SDValue Op = Node->getOperand(i); if ((i & 1) == 0) { @@ -620,11 +616,11 @@ void InstrEmitter::EmitRegSequence(SDNode *Node, } } } - AddOperand(MI, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, + AddOperand(MIB, Op, i+1, &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); } - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); SDValue Op(Node, 0); bool isNew = VRBaseMap.insert(std::make_pair(Op, NewVReg)).second; (void)isNew; // Silence compiler warning. @@ -661,7 +657,7 @@ InstrEmitter::EmitDbgValue(SDDbgValue *SD, if (I==VRBaseMap.end()) MIB.addReg(0U); // undef else - AddOperand(&*MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, + AddOperand(MIB, Op, (*MIB).getNumOperands(), &II, VRBaseMap, /*IsDebug=*/true, /*IsClone=*/false, /*IsCloned=*/false); } else if (SD->getKind() == SDDbgValue::CONST) { const Value *V = SD->getConst(); @@ -737,12 +733,12 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, #endif // Create the new machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), II); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), II); // Add result register values for things that are defined by this // instruction. if (NumResults) - CreateVirtualRegisters(Node, MI, II, IsClone, IsCloned, VRBaseMap); + CreateVirtualRegisters(Node, MIB, II, IsClone, IsCloned, VRBaseMap); // Emit all of the actual operands of this instruction, adding them to the // instruction as appropriate. @@ -751,17 +747,17 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, "Unable to cope with optional defs and phys regs defs!"); unsigned NumSkip = HasOptPRefs ? II.getNumDefs() - NumResults : 0; for (unsigned i = NumSkip; i != NodeOperands; ++i) - AddOperand(MI, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, + AddOperand(MIB, Node->getOperand(i), i-NumSkip+II.getNumDefs(), &II, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Transfer all of the memory reference descriptions of this instruction. - MI->setMemRefs(cast(Node)->memoperands_begin(), + MIB.setMemRefs(cast(Node)->memoperands_begin(), cast(Node)->memoperands_end()); // Insert the instruction into position in the block. This needs to // happen before any custom inserter hook is called so that the // hook knows where in the block to insert the replacement code. - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); // The MachineInstr may also define physregs instead of virtregs. These // physreg values can reach other instructions in different ways: @@ -819,13 +815,13 @@ EmitMachineNode(SDNode *Node, bool IsClone, bool IsCloned, // Finally mark unused registers as dead. if (!UsedRegs.empty() || II.getImplicitDefs()) - MI->setPhysRegsDeadExcept(UsedRegs, *TRI); + MIB->setPhysRegsDeadExcept(UsedRegs, *TRI); // Run post-isel target hook to adjust this instruction if needed. #ifdef NDEBUG if (II.hasPostISelHook()) #endif - TLI->AdjustInstrPostInstrSelection(MI, Node); + TLI->AdjustInstrPostInstrSelection(MIB, Node); } /// EmitSpecialNode - Generate machine code for a target-independent node and @@ -889,20 +885,20 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, --NumOps; // Ignore the glue operand. // Create the inline asm machine instruction. - MachineInstr *MI = BuildMI(*MF, Node->getDebugLoc(), - TII->get(TargetOpcode::INLINEASM)); + MachineInstrBuilder MIB = BuildMI(*MF, Node->getDebugLoc(), + TII->get(TargetOpcode::INLINEASM)); // Add the asm string as an external symbol operand. SDValue AsmStrV = Node->getOperand(InlineAsm::Op_AsmString); const char *AsmStr = cast(AsmStrV)->getSymbol(); - MI->addOperand(MachineOperand::CreateES(AsmStr)); + MIB.addExternalSymbol(AsmStr); // Add the HasSideEffect, isAlignStack, AsmDialect, MayLoad and MayStore // bits. int64_t ExtraInfo = cast(Node->getOperand(InlineAsm::Op_ExtraInfo))-> getZExtValue(); - MI->addOperand(MachineOperand::CreateImm(ExtraInfo)); + MIB.addImm(ExtraInfo); // Remember to operand index of the group flags. SmallVector GroupIdx; @@ -913,8 +909,8 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, cast(Node->getOperand(i))->getZExtValue(); const unsigned NumVals = InlineAsm::getNumOperandRegisters(Flags); - GroupIdx.push_back(MI->getNumOperands()); - MI->addOperand(MachineOperand::CreateImm(Flags)); + GroupIdx.push_back(MIB->getNumOperands()); + MIB.addImm(Flags); ++i; // Skip the ID value. switch (InlineAsm::getKind(Flags)) { @@ -925,20 +921,16 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // FIXME: Add dead flags for physical and virtual registers defined. // For now, mark physical register defs as implicit to help fast // regalloc. This makes inline asm look a lot like calls. - MI->addOperand(MachineOperand::CreateReg(Reg, true, - /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg))); + MIB.addReg(Reg, RegState::Define | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegDefEarlyClobber: case InlineAsm::Kind_Clobber: for (unsigned j = 0; j != NumVals; ++j, ++i) { unsigned Reg = cast(Node->getOperand(i))->getReg(); - MI->addOperand(MachineOperand::CreateReg(Reg, /*isDef=*/ true, - /*isImp=*/ TargetRegisterInfo::isPhysicalRegister(Reg), - /*isKill=*/ false, - /*isDead=*/ false, - /*isUndef=*/false, - /*isEarlyClobber=*/ true)); + MIB.addReg(Reg, RegState::Define | RegState::EarlyClobber | + getImplRegState(TargetRegisterInfo::isPhysicalRegister(Reg))); } break; case InlineAsm::Kind_RegUse: // Use of register. @@ -947,7 +939,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, // The addressing mode has been selected, just add all of the // operands to the machine instruction. for (unsigned j = 0; j != NumVals; ++j, ++i) - AddOperand(MI, Node->getOperand(i), 0, 0, VRBaseMap, + AddOperand(MIB, Node->getOperand(i), 0, 0, VRBaseMap, /*IsDebug=*/false, IsClone, IsCloned); // Manually set isTied bits. @@ -957,7 +949,7 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, unsigned DefIdx = GroupIdx[DefGroup] + 1; unsigned UseIdx = GroupIdx.back() + 1; for (unsigned j = 0; j != NumVals; ++j) - MI->tieOperands(DefIdx + j, UseIdx + j); + MIB->tieOperands(DefIdx + j, UseIdx + j); } } break; @@ -968,9 +960,9 @@ EmitSpecialNode(SDNode *Node, bool IsClone, bool IsCloned, SDValue MDV = Node->getOperand(InlineAsm::Op_MDNode); const MDNode *MD = cast(MDV)->getMD(); if (MD) - MI->addOperand(MachineOperand::CreateMetadata(MD)); + MIB.addMetadata(MD); - MBB->insert(InsertPos, MI); + MBB->insert(InsertPos, MIB); break; } } diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.h b/lib/CodeGen/SelectionDAG/InstrEmitter.h index 9eddee9e33d3..a9c2203e8400 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.h +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.h @@ -16,12 +16,13 @@ #ifndef INSTREMITTER_H #define INSTREMITTER_H -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { +class MachineInstrBuilder; class MCInstrDesc; class SDDbgValue; @@ -48,7 +49,8 @@ class InstrEmitter { unsigned getDstOfOnlyCopyToRegUse(SDNode *Node, unsigned ResNo) const; - void CreateVirtualRegisters(SDNode *Node, MachineInstr *MI, + void CreateVirtualRegisters(SDNode *Node, + MachineInstrBuilder &MIB, const MCInstrDesc &II, bool IsClone, bool IsCloned, DenseMap &VRBaseMap); @@ -61,7 +63,8 @@ class InstrEmitter { /// AddRegisterOperand - Add the specified register as an operand to the /// specified machine instr. Insert register copies if the register is /// not in the required register class. - void AddRegisterOperand(MachineInstr *MI, SDValue Op, + void AddRegisterOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, @@ -71,7 +74,8 @@ class InstrEmitter { /// specifies the instruction information for the node, and IIOpNum is the /// operand number (in the II) that we are adding. IIOpNum and II are used for /// assertions only. - void AddOperand(MachineInstr *MI, SDValue Op, + void AddOperand(MachineInstrBuilder &MIB, + SDValue Op, unsigned IIOpNum, const MCInstrDesc *II, DenseMap &VRBaseMap, @@ -81,7 +85,7 @@ class InstrEmitter { /// supports SubIdx sub-registers. Emit a copy if that isn't possible. /// Return the virtual register to use. unsigned ConstrainForSubReg(unsigned VReg, unsigned SubIdx, - EVT VT, DebugLoc DL); + MVT VT, DebugLoc DL); /// EmitSubregNode - Generate machine code for subreg nodes. /// diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index abf40b77a18f..51cc254b2c82 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -11,26 +11,27 @@ // //===----------------------------------------------------------------------===// -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/LLVMContext.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetFrameLowering.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/LLVMContext.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; //===----------------------------------------------------------------------===// @@ -101,6 +102,7 @@ private: SDNode *Node, bool isSigned); SDValue ExpandFPLibCall(SDNode *Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128); SDValue ExpandIntLibCall(SDNode *Node, bool isSigned, RTLIB::Libcall Call_I8, @@ -109,6 +111,7 @@ private: RTLIB::Libcall Call_I64, RTLIB::Libcall Call_I128); void ExpandDivRemLibCall(SDNode *Node, SmallVectorImpl &Results); + void ExpandSinCosLibCall(SDNode *Node, SmallVectorImpl &Results); SDValue EmitStackConvert(SDValue SrcOp, EVT SlotVT, EVT DestVT, DebugLoc dl); SDValue ExpandBUILD_VECTOR(SDNode *Node); @@ -321,7 +324,7 @@ static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, // Do a (aligned) store to a stack slot, then copy from the stack slot // to the final destination using (unaligned) integer loads and stores. EVT StoredVT = ST->getMemoryVT(); - EVT RegVT = + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), EVT::getIntegerVT(*DAG.getContext(), StoredVT.getSizeInBits())); @@ -447,7 +450,7 @@ ExpandUnalignedLoad(LoadSDNode *LD, SelectionDAG &DAG, // Copy the value to a (aligned) stack slot using (unaligned) integer // loads and stores, then do a (aligned) load from the stack slot. - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), intVT); unsigned LoadedBytes = LoadedVT.getSizeInBits() / 8; unsigned RegBytes = RegVT.getSizeInBits() / 8; unsigned NumRegs = (LoadedBytes + RegBytes - 1) / RegBytes; @@ -710,7 +713,7 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { { SDValue Value = ST->getValue(); - EVT VT = Value.getValueType(); + MVT VT = Value.getSimpleValueType(); switch (TLI.getOperationAction(ISD::STORE, VT)) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: @@ -731,9 +734,10 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { return; } case TargetLowering::Promote: { - assert(VT.isVector() && "Unknown legal promote case!"); - Value = DAG.getNode(ISD::BITCAST, dl, - TLI.getTypeToPromoteTo(ISD::STORE, VT), Value); + MVT NVT = TLI.getTypeToPromoteTo(ISD::STORE, VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote stores to same size type"); + Value = DAG.getNode(ISD::BITCAST, dl, NVT, Value); SDValue Result = DAG.getStore(Chain, dl, Value, Ptr, ST->getPointerInfo(), isVolatile, @@ -817,7 +821,8 @@ void SelectionDAGLegalize::LegalizeStoreOps(SDNode *Node) { SDValue Result = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, Lo, Hi); ReplaceNode(SDValue(Node, 0), Result); } else { - switch (TLI.getTruncStoreAction(ST->getValue().getValueType(), StVT)) { + switch (TLI.getTruncStoreAction(ST->getValue().getSimpleValueType(), + StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: // If this is an unaligned store and the target doesn't support it, @@ -862,7 +867,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { ISD::LoadExtType ExtType = LD->getExtensionType(); if (ExtType == ISD::NON_EXTLOAD) { - EVT VT = Node->getValueType(0); + MVT VT = Node->getSimpleValueType(0); SDValue RVal = SDValue(Node, 0); SDValue RChain = SDValue(Node, 1); @@ -889,10 +894,9 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { break; } case TargetLowering::Promote: { - // Only promote a load of vector type to another. - assert(VT.isVector() && "Cannot promote this load!"); - // Change base type to a different vector type. - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), VT); + assert(NVT.getSizeInBits() == VT.getSizeInBits() && + "Can only promote loads to same size type"); SDValue Res = DAG.getLoad(NVT, dl, Chain, Ptr, LD->getPointerInfo(), LD->isVolatile(), LD->isNonTemporal(), @@ -1037,7 +1041,7 @@ void SelectionDAGLegalize::LegalizeLoadOps(SDNode *Node) { Chain = Ch; } else { bool isCustom = false; - switch (TLI.getLoadExtAction(ExtType, SrcVT)) { + switch (TLI.getLoadExtAction(ExtType, SrcVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Custom: isCustom = true; @@ -1184,7 +1188,7 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { unsigned CCOperand = Node->getOpcode() == ISD::SELECT_CC ? 4 : Node->getOpcode() == ISD::SETCC ? 2 : 1; unsigned CompareOperand = Node->getOpcode() == ISD::BR_CC ? 2 : 0; - EVT OpVT = Node->getOperand(CompareOperand).getValueType(); + MVT OpVT = Node->getOperand(CompareOperand).getSimpleValueType(); ISD::CondCode CCCode = cast(Node->getOperand(CCOperand))->get(); Action = TLI.getCondCodeAction(CCCode, OpVT); @@ -1591,7 +1595,7 @@ void SelectionDAGLegalize::LegalizeSetCCCondCode(EVT VT, SDValue &LHS, SDValue &RHS, SDValue &CC, DebugLoc dl) { - EVT OpVT = LHS.getValueType(); + MVT OpVT = LHS.getSimpleValueType(); ISD::CondCode CCCode = cast(CC)->get(); switch (TLI.getCondCodeAction(CCCode, OpVT)) { default: llvm_unreachable("Unknown condition code action!"); @@ -1869,7 +1873,7 @@ SDValue SelectionDAGLegalize::ExpandLibCall(RTLIB::Libcall LC, SDNode *Node, // isTailCall may be true since the callee does not reference caller stack // frame. Check if it's in the right position. SDValue TCChain = InChain; - bool isTailCall = isInTailCallPosition(DAG, Node, TCChain, TLI); + bool isTailCall = TLI.isInTailCallPosition(DAG, Node, TCChain); if (isTailCall) InChain = TCChain; @@ -1956,6 +1960,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { RTLIB::Libcall LC; switch (Node->getValueType(0).getSimpleVT().SimpleTy) { @@ -1963,6 +1968,7 @@ SDValue SelectionDAGLegalize::ExpandFPLibCall(SDNode* Node, case MVT::f32: LC = Call_F32; break; case MVT::f64: LC = Call_F64; break; case MVT::f80: LC = Call_F80; break; + case MVT::f128: LC = Call_F128; break; case MVT::ppcf128: LC = Call_PPCF128; break; } return ExpandLibCall(LC, Node, false); @@ -2091,6 +2097,120 @@ SelectionDAGLegalize::ExpandDivRemLibCall(SDNode *Node, Results.push_back(Rem); } +/// isSinCosLibcallAvailable - Return true if sincos libcall is available. +static bool isSinCosLibcallAvailable(SDNode *Node, const TargetLowering &TLI) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + return TLI.getLibcallName(LC) != 0; +} + +/// canCombineSinCosLibcall - Return true if sincos libcall is available and +/// can be used to combine sin and cos. +static bool canCombineSinCosLibcall(SDNode *Node, const TargetLowering &TLI, + const TargetMachine &TM) { + if (!isSinCosLibcallAvailable(Node, TLI)) + return false; + // GNU sin/cos functions set errno while sincos does not. Therefore + // combining sin and cos is only safe if unsafe-fpmath is enabled. + bool isGNU = Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU; + if (isGNU && !TM.Options.UnsafeFPMath) + return false; + return true; +} + +/// useSinCos - Only issue sincos libcall if both sin and cos are +/// needed. +static bool useSinCos(SDNode *Node) { + unsigned OtherOpcode = Node->getOpcode() == ISD::FSIN + ? ISD::FCOS : ISD::FSIN; + + SDValue Op0 = Node->getOperand(0); + for (SDNode::use_iterator UI = Op0.getNode()->use_begin(), + UE = Op0.getNode()->use_end(); UI != UE; ++UI) { + SDNode *User = *UI; + if (User == Node) + continue; + // The other user might have been turned into sincos already. + if (User->getOpcode() == OtherOpcode || User->getOpcode() == ISD::FSINCOS) + return true; + } + return false; +} + +/// ExpandSinCosLibCall - Issue libcalls to sincos to compute sin / cos +/// pairs. +void +SelectionDAGLegalize::ExpandSinCosLibCall(SDNode *Node, + SmallVectorImpl &Results) { + RTLIB::Libcall LC; + switch (Node->getValueType(0).getSimpleVT().SimpleTy) { + default: llvm_unreachable("Unexpected request for libcall!"); + case MVT::f32: LC = RTLIB::SINCOS_F32; break; + case MVT::f64: LC = RTLIB::SINCOS_F64; break; + case MVT::f80: LC = RTLIB::SINCOS_F80; break; + case MVT::f128: LC = RTLIB::SINCOS_F128; break; + case MVT::ppcf128: LC = RTLIB::SINCOS_PPCF128; break; + } + + // The input chain to this libcall is the entry node of the function. + // Legalizing the call will automatically add the previous call to the + // dependence. + SDValue InChain = DAG.getEntryNode(); + + EVT RetVT = Node->getValueType(0); + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + + TargetLowering::ArgListTy Args; + TargetLowering::ArgListEntry Entry; + + // Pass the argument. + Entry.Node = Node->getOperand(0); + Entry.Ty = RetTy; + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Pass the return address of sin. + SDValue SinPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = SinPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + // Also pass the return address of the cos. + SDValue CosPtr = DAG.CreateStackTemporary(RetVT); + Entry.Node = CosPtr; + Entry.Ty = RetTy->getPointerTo(); + Entry.isSExt = false; + Entry.isZExt = false; + Args.push_back(Entry); + + SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), + TLI.getPointerTy()); + + DebugLoc dl = Node->getDebugLoc(); + TargetLowering:: + CallLoweringInfo CLI(InChain, Type::getVoidTy(*DAG.getContext()), + false, false, false, false, + 0, TLI.getLibcallCallingConv(LC), /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = TLI.LowerCallTo(CLI); + + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, SinPtr, + MachinePointerInfo(), false, false, false, 0)); + Results.push_back(DAG.getLoad(RetVT, dl, CallInfo.second, CosPtr, + MachinePointerInfo(), false, false, false, 0)); +} + /// ExpandLegalINT_TO_FP - This function is responsible for legalizing a /// INT_TO_FP operation of the specified operand when the target requests that /// we expand it. At this point, we know that the result and operand types are @@ -2419,18 +2539,6 @@ SDValue SelectionDAGLegalize::ExpandBSWAP(SDValue Op, DebugLoc dl) { } } -/// SplatByte - Distribute ByteVal over NumBits bits. -// FIXME: Move this helper to a common place. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// ExpandBitCount - Expand the specified bitcount instruction into operations. /// SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, @@ -2448,10 +2556,10 @@ SDValue SelectionDAGLegalize::ExpandBitCount(unsigned Opc, SDValue Op, // This is the "best" algorithm from // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel - SDValue Mask55 = DAG.getConstant(SplatByte(Len, 0x55), VT); - SDValue Mask33 = DAG.getConstant(SplatByte(Len, 0x33), VT); - SDValue Mask0F = DAG.getConstant(SplatByte(Len, 0x0F), VT); - SDValue Mask01 = DAG.getConstant(SplatByte(Len, 0x01), VT); + SDValue Mask55 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x55)), VT); + SDValue Mask33 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x33)), VT); + SDValue Mask0F = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x0F)), VT); + SDValue Mask01 = DAG.getConstant(APInt::getSplat(Len, APInt(8, 0x01)), VT); // v = v - ((v >> 1) & 0x55555555...) Op = DAG.getNode(ISD::SUB, dl, VT, Op, @@ -2801,7 +2909,8 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { SDValue True, False; EVT VT = Node->getOperand(0).getValueType(); EVT NVT = Node->getValueType(0); - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(DAG.EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); APInt x = APInt::getSignBit(NVT.getSizeInBits()); (void)apf.convertFromAPInt(x, false, APFloat::rmNearestTiesToEven); Tmp1 = DAG.getConstantFP(apf, VT); @@ -3032,77 +3141,114 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { } case ISD::FSQRT: Results.push_back(ExpandFPLibCall(Node, RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128)); + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128)); break; case ISD::FSIN: - Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_PPCF128)); + case ISD::FCOS: { + EVT VT = Node->getValueType(0); + bool isSIN = Node->getOpcode() == ISD::FSIN; + // Turn fsin / fcos into ISD::FSINCOS node if there are a pair of fsin / + // fcos which share the same operand and both are used. + if ((TLI.isOperationLegalOrCustom(ISD::FSINCOS, VT) || + canCombineSinCosLibcall(Node, TLI, TM)) + && useSinCos(Node)) { + SDVTList VTs = DAG.getVTList(VT, VT); + Tmp1 = DAG.getNode(ISD::FSINCOS, dl, VTs, Node->getOperand(0)); + if (!isSIN) + Tmp1 = Tmp1.getValue(1); + Results.push_back(Tmp1); + } else if (isSIN) { + Results.push_back(ExpandFPLibCall(Node, RTLIB::SIN_F32, RTLIB::SIN_F64, + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128)); + } else { + Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128)); + } break; - case ISD::FCOS: - Results.push_back(ExpandFPLibCall(Node, RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_PPCF128)); + } + case ISD::FSINCOS: + // Expand into sincos libcall. + ExpandSinCosLibCall(Node, Results); break; case ISD::FLOG: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_PPCF128)); + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128)); break; case ISD::FLOG2: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128)); + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128)); break; case ISD::FLOG10: Results.push_back(ExpandFPLibCall(Node, RTLIB::LOG10_F32, RTLIB::LOG10_F64, - RTLIB::LOG10_F80, RTLIB::LOG10_PPCF128)); + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128)); break; case ISD::FEXP: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_PPCF128)); + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128)); break; case ISD::FEXP2: Results.push_back(ExpandFPLibCall(Node, RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128)); + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128)); break; case ISD::FTRUNC: Results.push_back(ExpandFPLibCall(Node, RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128)); + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128)); break; case ISD::FFLOOR: Results.push_back(ExpandFPLibCall(Node, RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, RTLIB::FLOOR_PPCF128)); + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128)); break; case ISD::FCEIL: Results.push_back(ExpandFPLibCall(Node, RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128)); + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128)); break; case ISD::FRINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_PPCF128)); + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128)); break; case ISD::FNEARBYINT: Results.push_back(ExpandFPLibCall(Node, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128)); break; case ISD::FPOWI: Results.push_back(ExpandFPLibCall(Node, RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_PPCF128)); + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128)); break; case ISD::FPOW: Results.push_back(ExpandFPLibCall(Node, RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_PPCF128)); + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128)); break; case ISD::FDIV: Results.push_back(ExpandFPLibCall(Node, RTLIB::DIV_F32, RTLIB::DIV_F64, - RTLIB::DIV_F80, RTLIB::DIV_PPCF128)); + RTLIB::DIV_F80, RTLIB::DIV_F128, + RTLIB::DIV_PPCF128)); break; case ISD::FREM: Results.push_back(ExpandFPLibCall(Node, RTLIB::REM_F32, RTLIB::REM_F64, - RTLIB::REM_F80, RTLIB::REM_PPCF128)); + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128)); break; case ISD::FMA: Results.push_back(ExpandFPLibCall(Node, RTLIB::FMA_F32, RTLIB::FMA_F64, - RTLIB::FMA_F80, RTLIB::FMA_PPCF128)); + RTLIB::FMA_F80, RTLIB::FMA_F128, + RTLIB::FMA_PPCF128)); break; case ISD::FP16_TO_FP32: Results.push_back(ExpandLibCall(RTLIB::FPEXT_F16_F32, Node, false)); @@ -3158,7 +3304,6 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { case ISD::UREM: case ISD::SREM: { EVT VT = Node->getValueType(0); - SDVTList VTs = DAG.getVTList(VT, VT); bool isSigned = Node->getOpcode() == ISD::SREM; unsigned DivOpc = isSigned ? ISD::SDIV : ISD::UDIV; unsigned DivRemOpc = isSigned ? ISD::SDIVREM : ISD::UDIVREM; @@ -3169,6 +3314,7 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // If div is legal, it's better to do the normal expansion !TLI.isOperationLegalOrCustom(DivOpc, Node->getValueType(0)) && useDivRem(Node, isSigned, false))) { + SDVTList VTs = DAG.getVTList(VT, VT); Tmp1 = DAG.getNode(DivRemOpc, dl, VTs, Tmp2, Tmp3).getValue(1); } else if (TLI.isOperationLegalOrCustom(DivOpc, VT)) { // X % Y -> X-X/Y*Y @@ -3486,8 +3632,19 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { // Otherwise, SETCC for the given comparison type must be completely // illegal; expand it into a SELECT_CC. EVT VT = Node->getValueType(0); + int TrueValue; + switch (TLI.getBooleanContents(VT.isVector())) { + case TargetLowering::ZeroOrOneBooleanContent: + case TargetLowering::UndefinedBooleanContent: + TrueValue = 1; + break; + case TargetLowering::ZeroOrNegativeOneBooleanContent: + TrueValue = -1; + break; + } Tmp1 = DAG.getNode(ISD::SELECT_CC, dl, VT, Tmp1, Tmp2, - DAG.getConstant(1, VT), DAG.getConstant(0, VT), Tmp3); + DAG.getConstant(TrueValue, VT), DAG.getConstant(0, VT), + Tmp3); Results.push_back(Tmp1); break; } @@ -3575,13 +3732,13 @@ void SelectionDAGLegalize::ExpandNode(SDNode *Node) { void SelectionDAGLegalize::PromoteNode(SDNode *Node) { SmallVector Results; - EVT OVT = Node->getValueType(0); + MVT OVT = Node->getSimpleValueType(0); if (Node->getOpcode() == ISD::UINT_TO_FP || Node->getOpcode() == ISD::SINT_TO_FP || Node->getOpcode() == ISD::SETCC) { - OVT = Node->getOperand(0).getValueType(); + OVT = Node->getOperand(0).getSimpleValueType(); } - EVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); + MVT NVT = TLI.getTypeToPromoteTo(Node->getOpcode(), OVT); DebugLoc dl = Node->getDebugLoc(); SDValue Tmp1, Tmp2, Tmp3; switch (Node->getOpcode()) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 92dc5a9831b6..de217d8571ff 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -29,11 +29,13 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, RTLIB::Libcall Call_F32, RTLIB::Libcall Call_F64, RTLIB::Libcall Call_F80, + RTLIB::Libcall Call_F128, RTLIB::Libcall Call_PPCF128) { return VT == MVT::f32 ? Call_F32 : VT == MVT::f64 ? Call_F64 : VT == MVT::f80 ? Call_F80 : + VT == MVT::f128 ? Call_F128 : VT == MVT::ppcf128 ? Call_PPCF128 : RTLIB::UNKNOWN_LIBCALL; } @@ -152,23 +154,25 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FADD(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::ADD_F32, - RTLIB::ADD_F64, - RTLIB::ADD_F80, - RTLIB::ADD_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::ADD_F32, + RTLIB::ADD_F64, + RTLIB::ADD_F80, + RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::CEIL_F32, - RTLIB::CEIL_F64, - RTLIB::CEIL_F80, - RTLIB::CEIL_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::CEIL_F32, + RTLIB::CEIL_F64, + RTLIB::CEIL_F80, + RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { @@ -216,90 +220,98 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FCOS(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::COS_F32, - RTLIB::COS_F64, - RTLIB::COS_F80, - RTLIB::COS_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::COS_F32, + RTLIB::COS_F64, + RTLIB::COS_F80, + RTLIB::COS_F128, + RTLIB::COS_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FDIV(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP_F32, - RTLIB::EXP_F64, - RTLIB::EXP_F80, - RTLIB::EXP_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP_F32, + RTLIB::EXP_F64, + RTLIB::EXP_F80, + RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FEXP2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::EXP2_F32, - RTLIB::EXP2_F64, - RTLIB::EXP2_F80, - RTLIB::EXP2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::EXP2_F32, + RTLIB::EXP2_F64, + RTLIB::EXP2_F80, + RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FFLOOR(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32, - RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80, - RTLIB::FLOOR_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FLOOR_F32, + RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, + RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG_F32, - RTLIB::LOG_F64, - RTLIB::LOG_F80, - RTLIB::LOG_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG_F32, + RTLIB::LOG_F64, + RTLIB::LOG_F80, + RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG2(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG2_F32, - RTLIB::LOG2_F64, - RTLIB::LOG2_F80, - RTLIB::LOG2_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG2_F32, + RTLIB::LOG2_F64, + RTLIB::LOG2_F80, + RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FLOG10(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32, - RTLIB::LOG10_F64, - RTLIB::LOG10_F80, - RTLIB::LOG10_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::LOG10_F32, + RTLIB::LOG10_F64, + RTLIB::LOG10_F80, + RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { @@ -307,35 +319,38 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FMA(SDNode *N) { SDValue Ops[3] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)), GetSoftenedFloat(N->getOperand(2)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - NVT, Ops, 3, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + NVT, Ops, 3, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FMUL(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::NEARBYINT_F32, - RTLIB::NEARBYINT_F64, - RTLIB::NEARBYINT_F80, - RTLIB::NEARBYINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::NEARBYINT_F32, + RTLIB::NEARBYINT_F64, + RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, + RTLIB::NEARBYINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { @@ -343,12 +358,13 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { // Expand Y = FNEG(X) -> Y = SUB -0.0, X SDValue Ops[2] = { DAG.getConstantFP(-0.0, N->getValueType(0)), GetSoftenedFloat(N->getOperand(0)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { @@ -356,7 +372,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPEXT(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_EXTEND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } // FIXME: Should we just use 'normal' FP_EXTEND / FP_TRUNC instead of special @@ -364,8 +380,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_EXTEND(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatRes_FP16_TO_FP32(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = N->getOperand(0); - return MakeLibCall(RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, - N->getDebugLoc()); + return TLI.makeLibCall(DAG, RTLIB::FPEXT_F16_F32, NVT, &Op, 1, false, + N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { @@ -373,19 +389,20 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FP_ROUND(SDNode *N) { SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPROUND(Op.getValueType(), N->getValueType(0)); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND!"); - return MakeLibCall(LC, NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOW(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POW_F32, - RTLIB::POW_F64, - RTLIB::POW_F80, - RTLIB::POW_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POW_F32, + RTLIB::POW_F64, + RTLIB::POW_F80, + RTLIB::POW_F128, + RTLIB::POW_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { @@ -393,80 +410,87 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FPOWI(SDNode *N) { "Unsupported power type!"); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), N->getOperand(1) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::POWI_F32, - RTLIB::POWI_F64, - RTLIB::POWI_F80, - RTLIB::POWI_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::POWI_F32, + RTLIB::POWI_F64, + RTLIB::POWI_F80, + RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FREM(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::REM_F32, - RTLIB::REM_F64, - RTLIB::REM_F80, - RTLIB::REM_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, + RTLIB::REM_F64, + RTLIB::REM_F80, + RTLIB::REM_F128, + RTLIB::REM_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FRINT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::RINT_F32, - RTLIB::RINT_F64, - RTLIB::RINT_F80, - RTLIB::RINT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::RINT_F32, + RTLIB::RINT_F64, + RTLIB::RINT_F80, + RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSIN(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SIN_F32, - RTLIB::SIN_F64, - RTLIB::SIN_F80, - RTLIB::SIN_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SIN_F32, + RTLIB::SIN_F64, + RTLIB::SIN_F80, + RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSQRT(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SQRT_F32, - RTLIB::SQRT_F64, - RTLIB::SQRT_F80, - RTLIB::SQRT_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SQRT_F32, + RTLIB::SQRT_F64, + RTLIB::SQRT_F80, + RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FSUB(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Ops[2] = { GetSoftenedFloat(N->getOperand(0)), GetSoftenedFloat(N->getOperand(1)) }; - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - NVT, Ops, 2, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + NVT, Ops, 2, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::TRUNC_F32, - RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, - RTLIB::TRUNC_PPCF128), - NVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::TRUNC_F32, + RTLIB::TRUNC_F64, + RTLIB::TRUNC_F80, + RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), + NVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { @@ -559,8 +583,9 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { // Sign/zero extend the argument if the libcall takes a larger type. SDValue Op = DAG.getNode(Signed ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, NVT, N->getOperand(0)); - return MakeLibCall(LC, TLI.getTypeToTransformTo(*DAG.getContext(), RVT), - &Op, 1, false, dl); + return TLI.makeLibCall(DAG, LC, + TLI.getTypeToTransformTo(*DAG.getContext(), RVT), + &Op, 1, false, dl); } @@ -607,92 +632,6 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } -/// SoftenSetCCOperands - Soften the operands of a comparison. This code is -/// shared among BR_CC, SELECT_CC, and SETCC handlers. -void DAGTypeLegalizer::SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl) { - SDValue LHSInt = GetSoftenedFloat(NewLHS); - SDValue RHSInt = GetSoftenedFloat(NewRHS); - EVT VT = NewLHS.getValueType(); - - assert((VT == MVT::f32 || VT == MVT::f64) && "Unsupported setcc type!"); - - // Expand into one or more soft-fp libcall(s). - RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; - switch (CCCode) { - case ISD::SETEQ: - case ISD::SETOEQ: - LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - case ISD::SETNE: - case ISD::SETUNE: - LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : RTLIB::UNE_F64; - break; - case ISD::SETGE: - case ISD::SETOGE: - LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETLT: - case ISD::SETOLT: - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETLE: - case ISD::SETOLE: - LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETGT: - case ISD::SETOGT: - LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUO: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - break; - case ISD::SETO: - LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : RTLIB::O_F64; - break; - default: - LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : RTLIB::UO_F64; - switch (CCCode) { - case ISD::SETONE: - // SETONE = SETOLT | SETOGT - LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - // Fallthrough - case ISD::SETUGT: - LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : RTLIB::OGT_F64; - break; - case ISD::SETUGE: - LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : RTLIB::OGE_F64; - break; - case ISD::SETULT: - LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : RTLIB::OLT_F64; - break; - case ISD::SETULE: - LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : RTLIB::OLE_F64; - break; - case ISD::SETUEQ: - LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : RTLIB::OEQ_F64; - break; - default: llvm_unreachable("Do not know how to soften this setcc!"); - } - } - - // Use the target specific return value for comparions lib calls. - EVT RetVT = TLI.getCmpLibcallReturnType(); - SDValue Ops[2] = { LHSInt, RHSInt }; - NewLHS = MakeLibCall(LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewRHS = DAG.getConstant(0, RetVT); - CCCode = TLI.getCmpLibcallCC(LC1); - if (LC2 != RTLIB::UNKNOWN_LIBCALL) { - SDValue Tmp = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), - NewLHS, NewRHS, DAG.getCondCode(CCCode)); - NewLHS = MakeLibCall(LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); - NewLHS = DAG.getNode(ISD::SETCC, dl, TLI.getSetCCResultType(RetVT), NewLHS, - NewRHS, DAG.getCondCode(TLI.getCmpLibcallCC(LC2))); - NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); - NewRHS = SDValue(); - } -} - SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, N->getDebugLoc(), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); @@ -706,15 +645,19 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_ROUND(SDNode *N) { assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_ROUND libcall"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_BR_CC(SDNode *N) { SDValue NewLHS = N->getOperand(2), NewRHS = N->getOperand(3); ISD::CondCode CCCode = cast(N->getOperand(1))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -733,7 +676,7 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { @@ -741,22 +684,26 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_FP32_TO_FP16(SDNode *N) { EVT RVT = N->getValueType(0); RTLIB::Libcall LC = RTLIB::FPROUND_F32_F16; SDValue Op = GetSoftenedFloat(N->getOperand(0)); - return MakeLibCall(LC, RVT, &Op, 1, false, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, RVT, &Op, 1, false, N->getDebugLoc()); } SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(4))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, we need to compare the result + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, we need to compare the result // against zero to select between true and false values. if (NewRHS.getNode() == 0) { NewRHS = DAG.getConstant(0, NewLHS.getValueType()); @@ -773,9 +720,13 @@ SDValue DAGTypeLegalizer::SoftenFloatOp_SELECT_CC(SDNode *N) { SDValue DAGTypeLegalizer::SoftenFloatOp_SETCC(SDNode *N) { SDValue NewLHS = N->getOperand(0), NewRHS = N->getOperand(1); ISD::CondCode CCCode = cast(N->getOperand(2))->get(); - SoftenSetCCOperands(NewLHS, NewRHS, CCCode, N->getDebugLoc()); - // If SoftenSetCCOperands returned a scalar, use it. + EVT VT = NewLHS.getValueType(); + NewLHS = GetSoftenedFloat(NewLHS); + NewRHS = GetSoftenedFloat(NewRHS); + TLI.softenSetCCOperands(DAG, VT, NewLHS, NewRHS, CCCode, N->getDebugLoc()); + + // If softenSetCCOperands returned a scalar, use it. if (NewRHS.getNode() == 0) { assert(NewLHS.getValueType() == N->getValueType(0) && "Unexpected setcc expansion!"); @@ -873,6 +824,7 @@ void DAGTypeLegalizer::ExpandFloatResult(SDNode *N, unsigned ResNo) { case ISD::LOAD: ExpandFloatRes_LOAD(N, Lo, Hi); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: ExpandFloatRes_XINT_TO_FP(N, Lo, Hi); break; + case ISD::FREM: ExpandFloatRes_FREM(N, Lo, Hi); break; } // If Lo/Hi is null, the sub-method took care of registering results etc. @@ -886,9 +838,11 @@ void DAGTypeLegalizer::ExpandFloatRes_ConstantFP(SDNode *N, SDValue &Lo, assert(NVT.getSizeInBits() == integerPartWidth && "Do not know how to expand this float constant!"); APInt C = cast(N)->getValueAPF().bitcastToAPInt(); - Lo = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[1])), + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[1])), NVT); - Hi = DAG.getConstantFP(APFloat(APInt(integerPartWidth, C.getRawData()[0])), + Hi = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(integerPartWidth, C.getRawData()[0])), NVT); } @@ -910,7 +864,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FADD(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::ADD_F32, RTLIB::ADD_F64, - RTLIB::ADD_F80, RTLIB::ADD_PPCF128), + RTLIB::ADD_F80, RTLIB::ADD_F128, + RTLIB::ADD_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -919,7 +874,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCEIL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::CEIL_F32, RTLIB::CEIL_F64, - RTLIB::CEIL_F80, RTLIB::CEIL_PPCF128), + RTLIB::CEIL_F80, RTLIB::CEIL_F128, + RTLIB::CEIL_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -930,6 +886,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOPYSIGN(SDNode *N, RTLIB::COPYSIGN_F32, RTLIB::COPYSIGN_F64, RTLIB::COPYSIGN_F80, + RTLIB::COPYSIGN_F128, RTLIB::COPYSIGN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -939,7 +896,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::COS_F32, RTLIB::COS_F64, - RTLIB::COS_F80, RTLIB::COS_PPCF128), + RTLIB::COS_F80, RTLIB::COS_F128, + RTLIB::COS_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -947,13 +905,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FCOS(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FDIV(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::DIV_F32, - RTLIB::DIV_F64, - RTLIB::DIV_F80, - RTLIB::DIV_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::DIV_F32, + RTLIB::DIV_F64, + RTLIB::DIV_F80, + RTLIB::DIV_F128, + RTLIB::DIV_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -961,7 +920,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP_F32, RTLIB::EXP_F64, - RTLIB::EXP_F80, RTLIB::EXP_PPCF128), + RTLIB::EXP_F80, RTLIB::EXP_F128, + RTLIB::EXP_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -970,7 +930,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::EXP2_F32, RTLIB::EXP2_F64, - RTLIB::EXP2_F80, RTLIB::EXP2_PPCF128), + RTLIB::EXP2_F80, RTLIB::EXP2_F128, + RTLIB::EXP2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -978,8 +939,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FEXP2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FFLOOR(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::FLOOR_F32,RTLIB::FLOOR_F64, - RTLIB::FLOOR_F80,RTLIB::FLOOR_PPCF128), + RTLIB::FLOOR_F32, RTLIB::FLOOR_F64, + RTLIB::FLOOR_F80, RTLIB::FLOOR_F128, + RTLIB::FLOOR_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -988,7 +950,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG_F32, RTLIB::LOG_F64, - RTLIB::LOG_F80, RTLIB::LOG_PPCF128), + RTLIB::LOG_F80, RTLIB::LOG_F128, + RTLIB::LOG_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -997,7 +960,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::LOG2_F32, RTLIB::LOG2_F64, - RTLIB::LOG2_F80, RTLIB::LOG2_PPCF128), + RTLIB::LOG2_F80, RTLIB::LOG2_F128, + RTLIB::LOG2_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1005,8 +969,9 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG2(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), - RTLIB::LOG10_F32,RTLIB::LOG10_F64, - RTLIB::LOG10_F80,RTLIB::LOG10_PPCF128), + RTLIB::LOG10_F32, RTLIB::LOG10_F64, + RTLIB::LOG10_F80, RTLIB::LOG10_F128, + RTLIB::LOG10_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1014,26 +979,28 @@ void DAGTypeLegalizer::ExpandFloatRes_FLOG10(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FMA(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[3] = { N->getOperand(0), N->getOperand(1), N->getOperand(2) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::FMA_F32, - RTLIB::FMA_F64, - RTLIB::FMA_F80, - RTLIB::FMA_PPCF128), - N->getValueType(0), Ops, 3, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::FMA_F32, + RTLIB::FMA_F64, + RTLIB::FMA_F80, + RTLIB::FMA_F128, + RTLIB::FMA_PPCF128), + N->getValueType(0), Ops, 3, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } void DAGTypeLegalizer::ExpandFloatRes_FMUL(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::MUL_F32, - RTLIB::MUL_F64, - RTLIB::MUL_F80, - RTLIB::MUL_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::MUL_F32, + RTLIB::MUL_F64, + RTLIB::MUL_F80, + RTLIB::MUL_F128, + RTLIB::MUL_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1043,6 +1010,7 @@ void DAGTypeLegalizer::ExpandFloatRes_FNEARBYINT(SDNode *N, RTLIB::NEARBYINT_F32, RTLIB::NEARBYINT_F64, RTLIB::NEARBYINT_F80, + RTLIB::NEARBYINT_F128, RTLIB::NEARBYINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); @@ -1060,14 +1028,16 @@ void DAGTypeLegalizer::ExpandFloatRes_FP_EXTEND(SDNode *N, SDValue &Lo, SDValue &Hi) { EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); Hi = DAG.getNode(ISD::FP_EXTEND, N->getDebugLoc(), NVT, N->getOperand(0)); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); } void DAGTypeLegalizer::ExpandFloatRes_FPOW(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POW_F32, RTLIB::POW_F64, - RTLIB::POW_F80, RTLIB::POW_PPCF128), + RTLIB::POW_F80, RTLIB::POW_F128, + RTLIB::POW_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1076,7 +1046,18 @@ void DAGTypeLegalizer::ExpandFloatRes_FPOWI(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::POWI_F32, RTLIB::POWI_F64, - RTLIB::POWI_F80, RTLIB::POWI_PPCF128), + RTLIB::POWI_F80, RTLIB::POWI_F128, + RTLIB::POWI_PPCF128), + N, false); + GetPairElements(Call, Lo, Hi); +} + +void DAGTypeLegalizer::ExpandFloatRes_FREM(SDNode *N, + SDValue &Lo, SDValue &Hi) { + SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), + RTLIB::REM_F32, RTLIB::REM_F64, + RTLIB::REM_F80, RTLIB::REM_F128, + RTLIB::REM_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1085,7 +1066,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FRINT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::RINT_F32, RTLIB::RINT_F64, - RTLIB::RINT_F80, RTLIB::RINT_PPCF128), + RTLIB::RINT_F80, RTLIB::RINT_F128, + RTLIB::RINT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1094,7 +1076,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSIN(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SIN_F32, RTLIB::SIN_F64, - RTLIB::SIN_F80, RTLIB::SIN_PPCF128), + RTLIB::SIN_F80, RTLIB::SIN_F128, + RTLIB::SIN_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1103,7 +1086,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::SQRT_F32, RTLIB::SQRT_F64, - RTLIB::SQRT_F80, RTLIB::SQRT_PPCF128), + RTLIB::SQRT_F80, RTLIB::SQRT_F128, + RTLIB::SQRT_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1111,13 +1095,14 @@ void DAGTypeLegalizer::ExpandFloatRes_FSQRT(SDNode *N, void DAGTypeLegalizer::ExpandFloatRes_FSUB(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SDValue Call = MakeLibCall(GetFPLibCall(N->getValueType(0), - RTLIB::SUB_F32, - RTLIB::SUB_F64, - RTLIB::SUB_F80, - RTLIB::SUB_PPCF128), - N->getValueType(0), Ops, 2, false, - N->getDebugLoc()); + SDValue Call = TLI.makeLibCall(DAG, GetFPLibCall(N->getValueType(0), + RTLIB::SUB_F32, + RTLIB::SUB_F64, + RTLIB::SUB_F80, + RTLIB::SUB_F128, + RTLIB::SUB_PPCF128), + N->getValueType(0), Ops, 2, false, + N->getDebugLoc()); GetPairElements(Call, Lo, Hi); } @@ -1125,7 +1110,8 @@ void DAGTypeLegalizer::ExpandFloatRes_FTRUNC(SDNode *N, SDValue &Lo, SDValue &Hi) { SDValue Call = LibCallify(GetFPLibCall(N->getValueType(0), RTLIB::TRUNC_F32, RTLIB::TRUNC_F64, - RTLIB::TRUNC_F80, RTLIB::TRUNC_PPCF128), + RTLIB::TRUNC_F80, RTLIB::TRUNC_F128, + RTLIB::TRUNC_PPCF128), N, false); GetPairElements(Call, Lo, Hi); } @@ -1155,7 +1141,8 @@ void DAGTypeLegalizer::ExpandFloatRes_LOAD(SDNode *N, SDValue &Lo, Chain = Hi.getValue(1); // The low part is zero. - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); // Modified the chain - switch anything that used the old chain to use the // new one. @@ -1179,7 +1166,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, // The integer can be represented exactly in an f64. Src = DAG.getNode(isSigned ? ISD::SIGN_EXTEND : ISD::ZERO_EXTEND, dl, MVT::i32, Src); - Lo = DAG.getConstantFP(APFloat(APInt(NVT.getSizeInBits(), 0)), NVT); + Lo = DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(NVT), + APInt(NVT.getSizeInBits(), 0)), NVT); Hi = DAG.getNode(ISD::SINT_TO_FP, dl, NVT, Src); } else { RTLIB::Libcall LC = RTLIB::UNKNOWN_LIBCALL; @@ -1193,7 +1181,7 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported XINT_TO_FP!"); - Hi = MakeLibCall(LC, VT, &Src, 1, true, dl); + Hi = TLI.makeLibCall(DAG, LC, VT, &Src, 1, true, dl); GetPairElements(Hi, Lo, Hi); } @@ -1225,7 +1213,8 @@ void DAGTypeLegalizer::ExpandFloatRes_XINT_TO_FP(SDNode *N, SDValue &Lo, } Lo = DAG.getNode(ISD::FADD, dl, VT, Hi, - DAG.getConstantFP(APFloat(APInt(128, Parts)), + DAG.getConstantFP(APFloat(APFloat::PPCDoubleDouble, + APInt(128, Parts)), MVT::ppcf128)); Lo = DAG.getNode(ISD::SELECT_CC, dl, VT, Src, DAG.getConstant(0, SrcVT), Lo, Hi, DAG.getCondCode(ISD::SETLT)); @@ -1364,7 +1353,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_SINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOSINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_SINT!"); - return MakeLibCall(LC, RVT, &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, RVT, &N->getOperand(0), 1, false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { @@ -1377,7 +1366,7 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { assert(N->getOperand(0).getValueType() == MVT::ppcf128 && "Logic only correct for ppcf128!"); const uint64_t TwoE31[] = {0x41e0000000000000LL, 0}; - APFloat APF = APFloat(APInt(128, TwoE31)); + APFloat APF = APFloat(APFloat::PPCDoubleDouble, APInt(128, TwoE31)); SDValue Tmp = DAG.getConstantFP(APF, MVT::ppcf128); // X>=2^31 ? (int)(X-2^31)+0x80000000 : (int)X // FIXME: generated code sucks. @@ -1396,7 +1385,8 @@ SDValue DAGTypeLegalizer::ExpandFloatOp_FP_TO_UINT(SDNode *N) { RTLIB::Libcall LC = RTLIB::getFPTOUINT(N->getOperand(0).getValueType(), RVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported FP_TO_UINT!"); - return MakeLibCall(LC, N->getValueType(0), &N->getOperand(0), 1, false, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &N->getOperand(0), 1, + false, dl); } SDValue DAGTypeLegalizer::ExpandFloatOp_SELECT_CC(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp index a370faeb2399..d19c13b8ff13 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeIntegerTypes.cpp @@ -19,7 +19,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/DerivedTypes.h" +#include "llvm/IR/DerivedTypes.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -515,7 +515,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { // Only use the result of getSetCCResultType if it is legal, // otherwise just use the promoted result type (NVT). if (!TLI.isTypeLegal(SVT)) - SVT = NVT; + SVT = NVT; DebugLoc dl = N->getDebugLoc(); assert(SVT.isVector() == N->getOperand(0).getValueType().isVector() && @@ -531,9 +531,10 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SETCC(SDNode *N) { } SDValue DAGTypeLegalizer::PromoteIntRes_SHL(SDNode *N) { - return DAG.getNode(ISD::SHL, N->getDebugLoc(), - TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)), - GetPromotedInteger(N->getOperand(0)), N->getOperand(1)); + SDValue Res = GetPromotedInteger(N->getOperand(0)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SHL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SIGN_EXTEND_INREG(SDNode *N) { @@ -549,22 +550,23 @@ SDValue DAGTypeLegalizer::PromoteIntRes_SimpleIntBinOp(SDNode *N) { SDValue LHS = GetPromotedInteger(N->getOperand(0)); SDValue RHS = GetPromotedInteger(N->getOperand(1)); return DAG.getNode(N->getOpcode(), N->getDebugLoc(), - LHS.getValueType(), LHS, RHS); + LHS.getValueType(), LHS, RHS); } SDValue DAGTypeLegalizer::PromoteIntRes_SRA(SDNode *N) { // The input value must be properly sign extended. SDValue Res = SExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRA, N->getDebugLoc(), - Res.getValueType(), Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRA, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_SRL(SDNode *N) { // The input value must be properly zero extended. - EVT VT = N->getValueType(0); - EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); SDValue Res = ZExtPromotedInteger(N->getOperand(0)); - return DAG.getNode(ISD::SRL, N->getDebugLoc(), NVT, Res, N->getOperand(1)); + SDValue Amt = N->getOperand(1); + Amt = Amt.getValueType().isVector() ? ZExtPromotedInteger(Amt) : Amt; + return DAG.getNode(ISD::SRL, N->getDebugLoc(), Res.getValueType(), Res, Amt); } SDValue DAGTypeLegalizer::PromoteIntRes_TRUNCATE(SDNode *N) { @@ -703,7 +705,7 @@ SDValue DAGTypeLegalizer::PromoteIntRes_VAARG(SDNode *N) { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); - EVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); + MVT RegVT = TLI.getRegisterType(*DAG.getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), VT); // The argument is passed as NumRegs registers of type RegVT. @@ -1767,7 +1769,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_SINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOSINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-sint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, @@ -1777,7 +1780,8 @@ void DAGTypeLegalizer::ExpandIntRes_FP_TO_UINT(SDNode *N, SDValue &Lo, SDValue Op = N->getOperand(0); RTLIB::Libcall LC = RTLIB::getFPTOUINT(Op.getValueType(), VT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unexpected fp-to-uint conversion!"); - SplitInteger(MakeLibCall(LC, VT, &Op, 1, false/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, &Op, 1, false/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_LOAD(LoadSDNode *N, @@ -1992,7 +1996,8 @@ void DAGTypeLegalizer::ExpandIntRes_MUL(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported MUL!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true/*irrelevant*/, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true/*irrelevant*/, dl), + Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_SADDSUBO(SDNode *Node, @@ -2054,7 +2059,7 @@ void DAGTypeLegalizer::ExpandIntRes_SDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, @@ -2092,9 +2097,20 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, // Expand the subcomponents. SDValue LHSL, LHSH; GetExpandedInteger(N->getOperand(0), LHSL, LHSH); - - SDValue Ops[] = { LHSL, LHSH, N->getOperand(1) }; EVT VT = LHSL.getValueType(); + + // If the shift amount operand is coming from a vector legalization it may + // have an illegal type. Fix that first by casting the operand, otherwise + // the new SHL_PARTS operation would need further legalization. + SDValue ShiftOp = N->getOperand(1); + EVT ShiftTy = TLI.getShiftAmountTy(VT); + assert(ShiftTy.getScalarType().getSizeInBits() >= + Log2_32_Ceil(VT.getScalarType().getSizeInBits()) && + "ShiftAmountTy is too small to cover the range of this type!"); + if (ShiftOp.getValueType() != ShiftTy) + ShiftOp = DAG.getZExtOrTrunc(ShiftOp, dl, ShiftTy); + + SDValue Ops[] = { LHSL, LHSH, ShiftOp }; Lo = DAG.getNode(PartsOpc, dl, DAG.getVTList(VT, VT), Ops, 3); Hi = Lo.getValue(1); return; @@ -2138,7 +2154,7 @@ void DAGTypeLegalizer::ExpandIntRes_Shift(SDNode *N, if (LC != RTLIB::UNKNOWN_LIBCALL && TLI.getLibcallName(LC)) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, isSigned, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, isSigned, dl), Lo, Hi); return; } @@ -2221,7 +2237,7 @@ void DAGTypeLegalizer::ExpandIntRes_SREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported SREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, true, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, true, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_TRUNCATE(SDNode *N, @@ -2361,7 +2377,7 @@ void DAGTypeLegalizer::ExpandIntRes_UDIV(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UDIV!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, @@ -2381,7 +2397,7 @@ void DAGTypeLegalizer::ExpandIntRes_UREM(SDNode *N, assert(LC != RTLIB::UNKNOWN_LIBCALL && "Unsupported UREM!"); SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - SplitInteger(MakeLibCall(LC, VT, Ops, 2, false, dl), Lo, Hi); + SplitInteger(TLI.makeLibCall(DAG, LC, VT, Ops, 2, false, dl), Lo, Hi); } void DAGTypeLegalizer::ExpandIntRes_ZERO_EXTEND(SDNode *N, @@ -2549,7 +2565,7 @@ void DAGTypeLegalizer::IntegerExpandSetCCOperands(SDValue &NewLHS, // NOTE: on targets without efficient SELECT of bools, we can always use // this identity: (B1 ? B2 : B3) --> (B1 & B2)|(!B1&B3) - TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, false, true, true, NULL); + TargetLowering::DAGCombinerInfo DagCombineInfo(DAG, AfterLegalizeTypes, true, NULL); SDValue Tmp1, Tmp2; Tmp1 = TLI.SimplifySetCC(TLI.getSetCCResultType(LHSLo.getValueType()), LHSLo, RHSLo, LowCC, false, DagCombineInfo, dl); @@ -2668,7 +2684,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_SINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getSINTTOFP(Op.getValueType(), DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this SINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, N->getDebugLoc()); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, N->getDebugLoc()); } SDValue DAGTypeLegalizer::ExpandIntOp_STORE(StoreSDNode *N, unsigned OpNo) { @@ -2764,17 +2780,6 @@ SDValue DAGTypeLegalizer::ExpandIntOp_TRUNCATE(SDNode *N) { return DAG.getNode(ISD::TRUNCATE, N->getDebugLoc(), N->getValueType(0), InL); } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { SDValue Op = N->getOperand(0); EVT SrcVT = Op.getValueType(); @@ -2784,8 +2789,8 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { // The following optimization is valid only if every value in SrcVT (when // treated as signed) is representable in DstVT. Check that the mantissa // size of DstVT is >= than the number of bits in SrcVT -1. - const fltSemantics *sem = EVTToAPFloatSemantics(DstVT); - if (APFloat::semanticsPrecision(*sem) >= SrcVT.getSizeInBits()-1 && + const fltSemantics &sem = DAG.EVTToAPFloatSemantics(DstVT); + if (APFloat::semanticsPrecision(sem) >= SrcVT.getSizeInBits()-1 && TLI.getOperationAction(ISD::SINT_TO_FP, SrcVT) == TargetLowering::Custom){ // Do a signed conversion then adjust the result. SDValue SignedConv = DAG.getNode(ISD::SINT_TO_FP, dl, DstVT, Op); @@ -2846,7 +2851,7 @@ SDValue DAGTypeLegalizer::ExpandIntOp_UINT_TO_FP(SDNode *N) { RTLIB::Libcall LC = RTLIB::getUINTTOFP(SrcVT, DstVT); assert(LC != RTLIB::UNKNOWN_LIBCALL && "Don't know how to expand this UINT_TO_FP!"); - return MakeLibCall(LC, DstVT, &Op, 1, true, dl); + return TLI.makeLibCall(DAG, LC, DstVT, &Op, 1, true, dl); } SDValue DAGTypeLegalizer::ExpandIntOp_ATOMIC_STORE(SDNode *N) { diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 644e36e35e21..b6436bf42741 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -14,9 +14,9 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/CallingConv.h" -#include "llvm/DataLayout.h" #include "llvm/ADT/SetVector.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -735,6 +735,9 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { SDValue &OpEntry = PromotedIntegers[Op]; assert(OpEntry.getNode() == 0 && "Node is already promoted!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { @@ -746,6 +749,9 @@ void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { SDValue &OpEntry = SoftenedFloats[Op]; assert(OpEntry.getNode() == 0 && "Node is already converted to integer!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { @@ -760,6 +766,9 @@ void DAGTypeLegalizer::SetScalarizedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = ScalarizedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node is already scalarized!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedInteger(SDValue Op, SDValue &Lo, @@ -787,6 +796,10 @@ void DAGTypeLegalizer::SetExpandedInteger(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetExpandedFloat(SDValue Op, SDValue &Lo, @@ -814,6 +827,10 @@ void DAGTypeLegalizer::SetExpandedFloat(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already expanded"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::GetSplitVector(SDValue Op, SDValue &Lo, @@ -843,6 +860,10 @@ void DAGTypeLegalizer::SetSplitVector(SDValue Op, SDValue Lo, assert(Entry.first.getNode() == 0 && "Node already split"); Entry.first = Lo; Entry.second = Hi; + + // Propagate ordering + DAG.AssignOrdering(Lo.getNode(), DAG.GetOrdering(Op.getNode())); + DAG.AssignOrdering(Hi.getNode(), DAG.GetOrdering(Op.getNode())); } void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { @@ -854,6 +875,9 @@ void DAGTypeLegalizer::SetWidenedVector(SDValue Op, SDValue Result) { SDValue &OpEntry = WidenedVectors[Op]; assert(OpEntry.getNode() == 0 && "Node already widened!"); OpEntry = Result; + + // Propagate node ordering + DAG.AssignOrdering(Result.getNode(), DAG.GetOrdering(Op.getNode())); } @@ -919,8 +943,11 @@ bool DAGTypeLegalizer::CustomLowerNode(SDNode *N, EVT VT, bool LegalizeResult) { // Make everything that once used N's values now use those in Results instead. assert(Results.size() == N->getNumValues() && "Custom lowering returned the wrong number of results!"); - for (unsigned i = 0, e = Results.size(); i != e; ++i) + for (unsigned i = 0, e = Results.size(); i != e; ++i) { ReplaceValueWith(SDValue(N, i), Results[i]); + // Propagate node ordering + DAG.AssignOrdering(Results[i].getNode(), DAG.GetOrdering(N)); + } return true; } @@ -1020,50 +1047,20 @@ SDValue DAGTypeLegalizer::LibCallify(RTLIB::Libcall LC, SDNode *N, unsigned NumOps = N->getNumOperands(); DebugLoc dl = N->getDebugLoc(); if (NumOps == 0) { - return MakeLibCall(LC, N->getValueType(0), 0, 0, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), 0, 0, isSigned, dl); } else if (NumOps == 1) { SDValue Op = N->getOperand(0); - return MakeLibCall(LC, N->getValueType(0), &Op, 1, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), &Op, 1, isSigned, dl); } else if (NumOps == 2) { SDValue Ops[2] = { N->getOperand(0), N->getOperand(1) }; - return MakeLibCall(LC, N->getValueType(0), Ops, 2, isSigned, dl); + return TLI.makeLibCall(DAG, LC, N->getValueType(0), Ops, 2, isSigned, dl); } SmallVector Ops(NumOps); for (unsigned i = 0; i < NumOps; ++i) Ops[i] = N->getOperand(i); - return MakeLibCall(LC, N->getValueType(0), &Ops[0], NumOps, isSigned, dl); -} - -/// MakeLibCall - Generate a libcall taking the given operands as arguments and -/// returning a result of type RetVT. -SDValue DAGTypeLegalizer::MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, - bool isSigned, DebugLoc dl) { - TargetLowering::ArgListTy Args; - Args.reserve(NumOps); - - TargetLowering::ArgListEntry Entry; - for (unsigned i = 0; i != NumOps; ++i) { - Entry.Node = Ops[i]; - Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); - Entry.isSExt = isSigned; - Entry.isZExt = !isSigned; - Args.push_back(Entry); - } - SDValue Callee = DAG.getExternalSymbol(TLI.getLibcallName(LC), - TLI.getPointerTy()); - - Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); - TargetLowering:: - CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, - false, 0, TLI.getLibcallCallingConv(LC), - /*isTailCall=*/false, - /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, - Callee, Args, DAG, dl); - std::pair CallInfo = TLI.LowerCallTo(CLI); - - return CallInfo.first; + return TLI.makeLibCall(DAG, LC, N->getValueType(0), + &Ops[0], NumOps, isSigned, dl); } // ExpandChainLibCall - Expand a node into a call to a libcall. Similar to diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 20b7ce6b15ba..54ea926241cf 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -17,12 +17,12 @@ #define SELECTIONDAG_LEGALIZETYPES_H #define DEBUG_TYPE "legalize-types" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/Target/TargetLowering.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/DenseSet.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetLowering.h" namespace llvm { @@ -80,35 +80,35 @@ private: /// PromotedIntegers - For integer nodes that are below legal width, this map /// indicates what promoted value to use. - DenseMap PromotedIntegers; + SmallDenseMap PromotedIntegers; /// ExpandedIntegers - For integer nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap > ExpandedIntegers; + SmallDenseMap, 8> ExpandedIntegers; /// SoftenedFloats - For floating point nodes converted to integers of /// the same size, this map indicates the converted value to use. - DenseMap SoftenedFloats; + SmallDenseMap SoftenedFloats; /// ExpandedFloats - For float nodes that need to be expanded this map /// indicates which operands are the expanded version of the input. - DenseMap > ExpandedFloats; + SmallDenseMap, 8> ExpandedFloats; /// ScalarizedVectors - For nodes that are <1 x ty>, this map indicates the /// scalar value of type 'ty' to use. - DenseMap ScalarizedVectors; + SmallDenseMap ScalarizedVectors; /// SplitVectors - For nodes that need to be split this map indicates /// which operands are the expanded version of the input. - DenseMap > SplitVectors; + SmallDenseMap, 8> SplitVectors; /// WidenedVectors - For vector nodes that need to be widened, indicates /// the widened value to use. - DenseMap WidenedVectors; + SmallDenseMap WidenedVectors; /// ReplacedValues - For values that have been replaced with another, /// indicates the replacement value to use. - DenseMap ReplacedValues; + SmallDenseMap ReplacedValues; /// Worklist - This defines a worklist of nodes to process. In order to be /// pushed onto this worklist, all operands of a node must have already been @@ -159,9 +159,6 @@ private: SDValue GetVectorElementPointer(SDValue VecPtr, EVT EltVT, SDValue Index); SDValue JoinIntegers(SDValue Lo, SDValue Hi); SDValue LibCallify(RTLIB::Libcall LC, SDNode *N, bool isSigned); - SDValue MakeLibCall(RTLIB::Libcall LC, EVT RetVT, - const SDValue *Ops, unsigned NumOps, bool isSigned, - DebugLoc dl); std::pair ExpandChainLibCall(RTLIB::Libcall LC, SDNode *Node, bool isSigned); @@ -433,9 +430,6 @@ private: SDValue SoftenFloatOp_SETCC(SDNode *N); SDValue SoftenFloatOp_STORE(SDNode *N, unsigned OpNo); - void SoftenSetCCOperands(SDValue &NewLHS, SDValue &NewRHS, - ISD::CondCode &CCCode, DebugLoc dl); - //===--------------------------------------------------------------------===// // Float Expansion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// @@ -471,6 +465,7 @@ private: void ExpandFloatRes_FP_EXTEND (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOW (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FPOWI (SDNode *N, SDValue &Lo, SDValue &Hi); + void ExpandFloatRes_FREM (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FRINT (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSIN (SDNode *N, SDValue &Lo, SDValue &Hi); void ExpandFloatRes_FSQRT (SDNode *N, SDValue &Lo, SDValue &Hi); @@ -536,6 +531,7 @@ private: // Vector Operand Scalarization: <1 x ty> -> ty. bool ScalarizeVectorOperand(SDNode *N, unsigned OpNo); SDValue ScalarizeVecOp_BITCAST(SDNode *N); + SDValue ScalarizeVecOp_EXTEND(SDNode *N); SDValue ScalarizeVecOp_CONCAT_VECTORS(SDNode *N); SDValue ScalarizeVecOp_EXTRACT_VECTOR_ELT(SDNode *N); SDValue ScalarizeVecOp_STORE(StoreSDNode *N, unsigned OpNo); @@ -578,6 +574,7 @@ private: // Vector Operand Splitting: <128 x ty> -> 2 x <64 x ty>. bool SplitVectorOperand(SDNode *N, unsigned OpNo); + SDValue SplitVecOp_VSELECT(SDNode *N, unsigned OpNo); SDValue SplitVecOp_UnaryOp(SDNode *N); SDValue SplitVecOp_BITCAST(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index 6bcb3b25e98e..222d1c043a63 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -20,7 +20,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" using namespace llvm; //===----------------------------------------------------------------------===// diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 22f8d51ab2a9..c6e066e2709b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -40,7 +40,7 @@ class VectorLegalizer { /// LegalizedNodes - For nodes that are of legal width, and that have more /// than one use, this map indicates what regularized operand to use. This /// allows us to avoid legalizing the same thing more than once. - DenseMap LegalizedNodes; + SmallDenseMap LegalizedNodes; // Adds a node to the translation cache void AddLegalizedOperand(SDValue From, SDValue To) { @@ -61,6 +61,8 @@ class VectorLegalizer { // Implements expansion for UINT_TO_FLOAT; falls back to UnrollVectorOp if // SINT_TO_FLOAT and SHR on vectors isn't legal. SDValue ExpandUINT_TO_FLOAT(SDValue Op); + // Implement expansion for SIGN_EXTEND_INREG using SRL and SRA. + SDValue ExpandSEXTINREG(SDValue Op); // Implement vselect in terms of XOR, AND, OR when blend is not supported // by the target. SDValue ExpandVSELECT(SDValue Op); @@ -83,6 +85,25 @@ class VectorLegalizer { }; bool VectorLegalizer::Run() { + // Before we start legalizing vector nodes, check if there are any vectors. + bool HasVectors = false; + for (SelectionDAG::allnodes_iterator I = DAG.allnodes_begin(), + E = prior(DAG.allnodes_end()); I != llvm::next(E); ++I) { + // Check if the values of the nodes contain vectors. We don't need to check + // the operands because we are going to check their values at some point. + for (SDNode::value_iterator J = I->value_begin(), E = I->value_end(); + J != E; ++J) + HasVectors |= J->isVector(); + + // If we found a vector node we can start the legalization. + if (HasVectors) + break; + } + + // If this basic block has no vectors then no need to legalize vectors. + if (!HasVectors) + return false; + // The legalize process is inherently a bottom-up recursive process (users // legalize their uses before themselves). Given infinite stack space, we // could just start legalizing on the root and traverse the whole graph. In @@ -142,9 +163,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { } else if (Op.getOpcode() == ISD::STORE) { StoreSDNode *ST = cast(Op.getNode()); EVT StVT = ST->getMemoryVT(); - EVT ValVT = ST->getValue().getValueType(); + MVT ValVT = ST->getValue().getSimpleValueType(); if (StVT.isVector() && ST->isTruncatingStore()) - switch (TLI.getTruncStoreAction(ValVT, StVT)) { + switch (TLI.getTruncStoreAction(ValVT, StVT.getSimpleVT())) { default: llvm_unreachable("This action is not supported yet!"); case TargetLowering::Legal: return TranslateLegalizeResults(Op, Result); @@ -221,6 +242,8 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::FRINT: case ISD::FNEARBYINT: case ISD::FFLOOR: + case ISD::FP_ROUND: + case ISD::FP_EXTEND: case ISD::FMA: case ISD::SIGN_EXTEND_INREG: QueryType = Node->getValueType(0); @@ -260,7 +283,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { // FALL THROUGH } case TargetLowering::Expand: - if (Node->getOpcode() == ISD::VSELECT) + if (Node->getOpcode() == ISD::SIGN_EXTEND_INREG) + Result = ExpandSEXTINREG(Op); + else if (Node->getOpcode() == ISD::VSELECT) Result = ExpandVSELECT(Op); else if (Node->getOpcode() == ISD::SELECT) Result = ExpandSELECT(Op); @@ -291,10 +316,10 @@ SDValue VectorLegalizer::PromoteVectorOp(SDValue Op) { // Vector "promotion" is basically just bitcasting and doing the operation // in a different type. For example, x86 promotes ISD::AND on v2i32 to // v1i64. - EVT VT = Op.getValueType(); + MVT VT = Op.getSimpleValueType(); assert(Op.getNode()->getNumValues() == 1 && "Can't promote a vector with multiple results!"); - EVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); + MVT NVT = TLI.getTypeToPromoteTo(Op.getOpcode(), VT); DebugLoc dl = Op.getDebugLoc(); SmallVector Operands(Op.getNumOperands()); @@ -357,30 +382,135 @@ SDValue VectorLegalizer::ExpandLoad(SDValue Op) { EVT SrcVT = LD->getMemoryVT(); ISD::LoadExtType ExtType = LD->getExtensionType(); - SmallVector LoadVals; + SmallVector Vals; SmallVector LoadChains; unsigned NumElem = SrcVT.getVectorNumElements(); - unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; - for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), - Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), - SrcVT.getScalarType(), - LD->isVolatile(), LD->isNonTemporal(), - LD->getAlignment()); + EVT SrcEltVT = SrcVT.getScalarType(); + EVT DstEltVT = Op.getNode()->getValueType(0).getScalarType(); + + if (SrcVT.getVectorNumElements() > 1 && !SrcEltVT.isByteSized()) { + // When elements in a vector is not byte-addressable, we cannot directly + // load each element by advancing pointer, which could only address bytes. + // Instead, we load all significant words, mask bits off, and concatenate + // them to form each element. Finally, they are extended to destination + // scalar type to build the destination vector. + EVT WideVT = TLI.getPointerTy(); + + assert(WideVT.isRound() && + "Could not handle the sophisticated case when the widest integer is" + " not power of 2."); + assert(WideVT.bitsGE(SrcEltVT) && + "Type is not legalized?"); + + unsigned WideBytes = WideVT.getStoreSize(); + unsigned Offset = 0; + unsigned RemainingBytes = SrcVT.getStoreSize(); + SmallVector LoadVals; + + while (RemainingBytes > 0) { + SDValue ScalarLoad; + unsigned LoadBytes = WideBytes; + + if (RemainingBytes >= LoadBytes) { + ScalarLoad = DAG.getLoad(WideVT, dl, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LD->isVolatile(), LD->isNonTemporal(), + LD->isInvariant(), LD->getAlignment()); + } else { + EVT LoadVT = WideVT; + while (RemainingBytes < LoadBytes) { + LoadBytes >>= 1; // Reduce the load size by half. + LoadVT = EVT::getIntegerVT(*DAG.getContext(), LoadBytes << 3); + } + ScalarLoad = DAG.getExtLoad(ISD::EXTLOAD, dl, WideVT, Chain, BasePTR, + LD->getPointerInfo().getWithOffset(Offset), + LoadVT, LD->isVolatile(), + LD->isNonTemporal(), LD->getAlignment()); + } - BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, - DAG.getIntPtrConstant(Stride)); + RemainingBytes -= LoadBytes; + Offset += LoadBytes; + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(LoadBytes)); + + LoadVals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } + + // Extract bits, pack and extend/trunc them into destination type. + unsigned SrcEltBits = SrcEltVT.getSizeInBits(); + SDValue SrcEltBitMask = DAG.getConstant((1U << SrcEltBits) - 1, WideVT); + + unsigned BitOffset = 0; + unsigned WideIdx = 0; + unsigned WideBits = WideVT.getSizeInBits(); + + for (unsigned Idx = 0; Idx != NumElem; ++Idx) { + SDValue Lo, Hi, ShAmt; + + if (BitOffset < WideBits) { + ShAmt = DAG.getConstant(BitOffset, TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SRL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Lo = DAG.getNode(ISD::AND, dl, WideVT, Lo, SrcEltBitMask); + } - LoadVals.push_back(ScalarLoad.getValue(0)); - LoadChains.push_back(ScalarLoad.getValue(1)); + BitOffset += SrcEltBits; + if (BitOffset >= WideBits) { + WideIdx++; + Offset -= WideBits; + if (Offset > 0) { + ShAmt = DAG.getConstant(SrcEltBits - Offset, + TLI.getShiftAmountTy(WideVT)); + Hi = DAG.getNode(ISD::SHL, dl, WideVT, LoadVals[WideIdx], ShAmt); + Hi = DAG.getNode(ISD::AND, dl, WideVT, Hi, SrcEltBitMask); + } + } + + if (Hi.getNode()) + Lo = DAG.getNode(ISD::OR, dl, WideVT, Lo, Hi); + + switch (ExtType) { + default: llvm_unreachable("Unknown extended-load op!"); + case ISD::EXTLOAD: + Lo = DAG.getAnyExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::ZEXTLOAD: + Lo = DAG.getZExtOrTrunc(Lo, dl, DstEltVT); + break; + case ISD::SEXTLOAD: + ShAmt = DAG.getConstant(WideBits - SrcEltBits, + TLI.getShiftAmountTy(WideVT)); + Lo = DAG.getNode(ISD::SHL, dl, WideVT, Lo, ShAmt); + Lo = DAG.getNode(ISD::SRA, dl, WideVT, Lo, ShAmt); + Lo = DAG.getSExtOrTrunc(Lo, dl, DstEltVT); + break; + } + Vals.push_back(Lo); + } + } else { + unsigned Stride = SrcVT.getScalarType().getSizeInBits()/8; + + for (unsigned Idx=0; IdxgetValueType(0).getScalarType(), + Chain, BasePTR, LD->getPointerInfo().getWithOffset(Idx * Stride), + SrcVT.getScalarType(), + LD->isVolatile(), LD->isNonTemporal(), + LD->getAlignment()); + + BasePTR = DAG.getNode(ISD::ADD, dl, BasePTR.getValueType(), BasePTR, + DAG.getIntPtrConstant(Stride)); + + Vals.push_back(ScalarLoad.getValue(0)); + LoadChains.push_back(ScalarLoad.getValue(1)); + } } SDValue NewChain = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, &LoadChains[0], LoadChains.size()); SDValue Value = DAG.getNode(ISD::BUILD_VECTOR, dl, - Op.getNode()->getValueType(0), &LoadVals[0], LoadVals.size()); + Op.getNode()->getValueType(0), &Vals[0], Vals.size()); AddLegalizedOperand(Op.getValue(0), Value); AddLegalizedOperand(Op.getValue(1), NewChain); @@ -499,6 +629,26 @@ SDValue VectorLegalizer::ExpandSELECT(SDValue Op) { return DAG.getNode(ISD::BITCAST, DL, Op.getValueType(), Val); } +SDValue VectorLegalizer::ExpandSEXTINREG(SDValue Op) { + EVT VT = Op.getValueType(); + + // Make sure that the SRA and SHL instructions are available. + if (TLI.getOperationAction(ISD::SRA, VT) == TargetLowering::Expand || + TLI.getOperationAction(ISD::SHL, VT) == TargetLowering::Expand) + return DAG.UnrollVectorOp(Op.getNode()); + + DebugLoc DL = Op.getDebugLoc(); + EVT OrigTy = cast(Op->getOperand(1))->getVT(); + + unsigned BW = VT.getScalarType().getSizeInBits(); + unsigned OrigBW = OrigTy.getScalarType().getSizeInBits(); + SDValue ShiftSz = DAG.getConstant(BW - OrigBW, VT); + + Op = Op.getOperand(0); + Op = DAG.getNode(ISD::SHL, DL, VT, Op, ShiftSz); + return DAG.getNode(ISD::SRA, DL, VT, Op, ShiftSz); +} + SDValue VectorLegalizer::ExpandVSELECT(SDValue Op) { // Implement VSELECT in terms of XOR, AND, OR // on platforms which do not support blend natively. diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp index d51a6eb192ee..5ec853563888 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorTypes.cpp @@ -21,7 +21,7 @@ //===----------------------------------------------------------------------===// #include "LegalizeTypes.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -365,6 +365,11 @@ bool DAGTypeLegalizer::ScalarizeVectorOperand(SDNode *N, unsigned OpNo) { case ISD::BITCAST: Res = ScalarizeVecOp_BITCAST(N); break; + case ISD::ANY_EXTEND: + case ISD::ZERO_EXTEND: + case ISD::SIGN_EXTEND: + Res = ScalarizeVecOp_EXTEND(N); + break; case ISD::CONCAT_VECTORS: Res = ScalarizeVecOp_CONCAT_VECTORS(N); break; @@ -400,6 +405,21 @@ SDValue DAGTypeLegalizer::ScalarizeVecOp_BITCAST(SDNode *N) { N->getValueType(0), Elt); } +/// ScalarizeVecOp_EXTEND - If the value to extend is a vector that needs +/// to be scalarized, it must be <1 x ty>. Extend the element instead. +SDValue DAGTypeLegalizer::ScalarizeVecOp_EXTEND(SDNode *N) { + assert(N->getValueType(0).getVectorNumElements() == 1 && + "Unexected vector type!"); + SDValue Elt = GetScalarizedVector(N->getOperand(0)); + SmallVector Ops(1); + Ops[0] = DAG.getNode(N->getOpcode(), N->getDebugLoc(), + N->getValueType(0).getScalarType(), Elt); + // Revectorize the result so the types line up with what the uses of this + // expression expect. + return DAG.getNode(ISD::BUILD_VECTOR, N->getDebugLoc(), N->getValueType(0), + &Ops[0], 1); +} + /// ScalarizeVecOp_CONCAT_VECTORS - The vectors to concatenate have length one - /// use a BUILD_VECTOR instead. SDValue DAGTypeLegalizer::ScalarizeVecOp_CONCAT_VECTORS(SDNode *N) { @@ -1030,7 +1050,9 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { case ISD::STORE: Res = SplitVecOp_STORE(cast(N), OpNo); break; - + case ISD::VSELECT: + Res = SplitVecOp_VSELECT(N, OpNo); + break; case ISD::CTTZ: case ISD::CTLZ: case ISD::CTPOP: @@ -1064,6 +1086,58 @@ bool DAGTypeLegalizer::SplitVectorOperand(SDNode *N, unsigned OpNo) { return false; } +SDValue DAGTypeLegalizer::SplitVecOp_VSELECT(SDNode *N, unsigned OpNo) { + // The only possibility for an illegal operand is the mask, since result type + // legalization would have handled this node already otherwise. + assert(OpNo == 0 && "Illegal operand must be mask"); + + SDValue Mask = N->getOperand(0); + SDValue Src0 = N->getOperand(1); + SDValue Src1 = N->getOperand(2); + DebugLoc DL = N->getDebugLoc(); + EVT MaskVT = Mask.getValueType(); + assert(MaskVT.isVector() && "VSELECT without a vector mask?"); + + SDValue Lo, Hi; + GetSplitVector(N->getOperand(0), Lo, Hi); + assert(Lo.getValueType() == Hi.getValueType() && + "Lo and Hi have differing types");; + + unsigned LoNumElts = Lo.getValueType().getVectorNumElements(); + unsigned HiNumElts = Hi.getValueType().getVectorNumElements(); + assert(LoNumElts == HiNumElts && "Asymmetric vector split?"); + + LLVMContext &Ctx = *DAG.getContext(); + SDValue Zero = DAG.getIntPtrConstant(0); + SDValue LoElts = DAG.getIntPtrConstant(LoNumElts); + EVT Src0VT = Src0.getValueType(); + EVT Src0EltTy = Src0VT.getVectorElementType(); + EVT MaskEltTy = MaskVT.getVectorElementType(); + + EVT LoOpVT = EVT::getVectorVT(Ctx, Src0EltTy, LoNumElts); + EVT LoMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, LoNumElts); + EVT HiOpVT = EVT::getVectorVT(Ctx, Src0EltTy, HiNumElts); + EVT HiMaskVT = EVT::getVectorVT(Ctx, MaskEltTy, HiNumElts); + + SDValue LoOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src0, Zero); + SDValue LoOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoOpVT, Src1, Zero); + + SDValue HiOp0 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src0, LoElts); + SDValue HiOp1 = DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiOpVT, Src1, LoElts); + + SDValue LoMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, LoMaskVT, Mask, Zero); + SDValue HiMask = + DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, HiMaskVT, Mask, LoElts); + + SDValue LoSelect = + DAG.getNode(ISD::VSELECT, DL, LoOpVT, LoMask, LoOp0, LoOp1); + SDValue HiSelect = + DAG.getNode(ISD::VSELECT, DL, HiOpVT, HiMask, HiOp0, HiOp1); + + return DAG.getNode(ISD::CONCAT_VECTORS, DL, Src0VT, LoSelect, HiSelect); +} + SDValue DAGTypeLegalizer::SplitVecOp_UnaryOp(SDNode *N) { // The result has a legal vector type, but the input needs splitting. EVT ResVT = N->getValueType(0); diff --git a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp index c3794d5f7863..473e1384e399 100644 --- a/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp +++ b/lib/CodeGen/SelectionDAG/ResourcePriorityQueue.cpp @@ -21,13 +21,13 @@ #define DEBUG_TYPE "scheduler" #include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/SelectionDAGNodes.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -94,9 +94,9 @@ ResourcePriorityQueue::numberRCValPredInSU(SUnit *SU, unsigned RCId) { continue; for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { - EVT VT = ScegN->getValueType(i); + MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT) - && (TLI->getRegClassFor(VT)->getID() == RCId)) { + && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } @@ -132,9 +132,9 @@ unsigned ResourcePriorityQueue::numberRCValSuccInSU(SUnit *SU, for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT) - && (TLI->getRegClassFor(VT)->getID() == RCId)) { + && (TLI->getRegClassFor(VT)->getID() == RCId)) { NumberDeps++; break; } @@ -332,7 +332,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { // Gen estimate. for (unsigned i = 0, e = SU->getNode()->getNumValues(); i != e; ++i) { - EVT VT = SU->getNode()->getValueType(i); + MVT VT = SU->getNode()->getSimpleValueType(i); if (TLI->isTypeLegal(VT) && TLI->getRegClassFor(VT) && TLI->getRegClassFor(VT)->getID() == RCId) @@ -341,7 +341,7 @@ signed ResourcePriorityQueue::rawRegPressureDelta(SUnit *SU, unsigned RCId) { // Kill estimate. for (unsigned i = 0, e = SU->getNode()->getNumOperands(); i != e; ++i) { const SDValue &Op = SU->getNode()->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (isa(Op.getNode())) continue; @@ -485,7 +485,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { if (ScegN->isMachineOpcode()) { // Estimate generated regs. for (unsigned i = 0, e = ScegN->getNumValues(); i != e; ++i) { - EVT VT = ScegN->getValueType(i); + MVT VT = ScegN->getSimpleValueType(i); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); @@ -496,7 +496,7 @@ void ResourcePriorityQueue::scheduledNode(SUnit *SU) { // Estimate killed regs. for (unsigned i = 0, e = ScegN->getNumOperands(); i != e; ++i) { const SDValue &Op = ScegN->getOperand(i); - EVT VT = Op.getNode()->getValueType(Op.getResNo()); + MVT VT = Op.getNode()->getSimpleValueType(Op.getResNo()); if (TLI->isTypeLegal(VT)) { const TargetRegisterClass *RC = TLI->getRegClassFor(VT); @@ -604,10 +604,8 @@ SUnit *ResourcePriorityQueue::pop() { std::vector::iterator Best = Queue.begin(); if (!DisableDFASched) { signed BestCost = SUSchedulingCost(*Best); - for (std::vector::iterator I = Queue.begin(), + for (std::vector::iterator I = llvm::next(Queue.begin()), E = Queue.end(); I != E; ++I) { - if (*I == *Best) - continue; if (SUSchedulingCost(*I) > BestCost) { BestCost = SUSchedulingCost(*I); diff --git a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h index 2dcb22957325..4af7172847d7 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h +++ b/lib/CodeGen/SelectionDAG/SDNodeDbgValue.h @@ -15,8 +15,8 @@ #define LLVM_CODEGEN_SDNODEDBGVALUE_H #include "llvm/ADT/SmallVector.h" -#include "llvm/Support/DebugLoc.h" #include "llvm/Support/DataTypes.h" +#include "llvm/Support/DebugLoc.h" namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h index d2269f8accf1..7e7b8974be48 100644 --- a/lib/CodeGen/SelectionDAG/SDNodeOrdering.h +++ b/lib/CodeGen/SelectionDAG/SDNodeOrdering.h @@ -33,8 +33,10 @@ class SDNodeOrdering { public: SDNodeOrdering() {} - void add(const SDNode *Node, unsigned O) { - OrderMap[Node] = O; + void add(const SDNode *Node, unsigned NewOrder) { + unsigned &OldOrder = OrderMap[Node]; + if (OldOrder == 0 || (OldOrder > 0 && NewOrder < OldOrder)) + OldOrder = NewOrder; } void remove(const SDNode *Node) { DenseMap::iterator Itr = OrderMap.find(Node); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 2ecdd8941551..d1f36cb647dc 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -12,20 +12,20 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "ScheduleDAGSDNodes.h" -#include "InstrEmitter.h" -#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Support/Debug.h" +#include "InstrEmitter.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; STATISTIC(NumUnfolds, "Number of nodes unfolded"); diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index c55456902c87..c009cfcc516d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -16,22 +16,23 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "ScheduleDAGSDNodes.h" -#include "llvm/InlineAsm.h" #include "llvm/CodeGen/SchedulerRegistry.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" +#include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/ScheduleHazardRecognizer.h" +#include "llvm/CodeGen/SelectionDAGISel.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/InlineAsm.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -142,6 +143,12 @@ private: std::vector LiveRegDefs; std::vector LiveRegGens; + // Collect interferences between physical register use/defs. + // Each interference is an SUnit and set of physical registers. + SmallVector Interferences; + typedef DenseMap > LRegsMapT; + LRegsMapT LRegsMap; + /// Topo - A topological ordering for SUnits which permits fast IsReachable /// and similar queries. ScheduleDAGTopologicalSort Topo; @@ -156,7 +163,7 @@ public: CodeGenOpt::Level OptLevel) : ScheduleDAGSDNodes(mf), NeedLatency(needlatency), AvailableQueue(availqueue), CurCycle(0), - Topo(SUnits) { + Topo(SUnits, NULL) { const TargetMachine &tm = mf.getTarget(); if (DisableSchedCycles || !NeedLatency) @@ -225,6 +232,8 @@ private: SmallVector&); bool DelayForLiveRegsBottomUp(SUnit*, SmallVector&); + void releaseInterferences(unsigned Reg = 0); + SUnit *PickNodeToScheduleBottomUp(); void ListScheduleBottomUp(); @@ -268,14 +277,23 @@ static void GetCostForDef(const ScheduleDAGSDNodes::RegDefIter &RegDefPos, const TargetRegisterInfo *TRI, unsigned &RegClass, unsigned &Cost, const MachineFunction &MF) { - EVT VT = RegDefPos.GetValue(); + MVT VT = RegDefPos.GetValue(); // Special handling for untyped values. These values can only come from // the expansion of custom DAG-to-DAG patterns. if (VT == MVT::Untyped) { const SDNode *Node = RegDefPos.GetNode(); - unsigned Opcode = Node->getMachineOpcode(); + // Special handling for CopyFromReg of untyped values. + if (!Node->isMachineOpcode() && Node->getOpcode() == ISD::CopyFromReg) { + unsigned Reg = cast(Node->getOperand(1))->getReg(); + const TargetRegisterClass *RC = MF.getRegInfo().getRegClass(Reg); + RegClass = RC->getID(); + Cost = 1; + return; + } + + unsigned Opcode = Node->getMachineOpcode(); if (Opcode == TargetOpcode::REG_SEQUENCE) { unsigned DstRCIdx = cast(Node->getOperand(0))->getZExtValue(); const TargetRegisterClass *RC = TRI->getRegClass(DstRCIdx); @@ -312,6 +330,7 @@ void ScheduleDAGRRList::Schedule() { LiveRegDefs.resize(TRI->getNumRegs() + 1, NULL); LiveRegGens.resize(TRI->getNumRegs() + 1, NULL); CallSeqEndForStart.clear(); + assert(Interferences.empty() && LRegsMap.empty() && "stale Interferences"); // Build the scheduling graph. BuildSchedGraph(NULL); @@ -725,6 +744,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } // Release the special call resource dependence, if this is the beginning @@ -739,6 +759,7 @@ void ScheduleDAGRRList::ScheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -794,6 +815,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[I->getReg()] = NULL; LiveRegGens[I->getReg()] = NULL; + releaseInterferences(I->getReg()); } } @@ -821,6 +843,7 @@ void ScheduleDAGRRList::UnscheduleNodeBottomUp(SUnit *SU) { --NumLiveRegs; LiveRegDefs[CallResource] = NULL; LiveRegGens[CallResource] = NULL; + releaseInterferences(CallResource); } } @@ -881,9 +904,6 @@ void ScheduleDAGRRList::BacktrackBottomUp(SUnit *SU, SUnit *BtSU) { SUnit *OldSU = Sequence.back(); while (true) { Sequence.pop_back(); - if (SU->isSucc(OldSU)) - // Don't try to remove SU from AvailableQueue. - SU->isAvailable = false; // FIXME: use ready cycle instead of height CurCycle = OldSU->getHeight(); UnscheduleNodeBottomUp(OldSU); @@ -1305,34 +1325,60 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVector &LRegs) { return !LRegs.empty(); } +void ScheduleDAGRRList::releaseInterferences(unsigned Reg) { + // Add the nodes that aren't ready back onto the available list. + for (unsigned i = Interferences.size(); i > 0; --i) { + SUnit *SU = Interferences[i-1]; + LRegsMapT::iterator LRegsPos = LRegsMap.find(SU); + if (Reg) { + SmallVector &LRegs = LRegsPos->second; + if (std::find(LRegs.begin(), LRegs.end(), Reg) == LRegs.end()) + continue; + } + SU->isPending = false; + // The interfering node may no longer be available due to backtracking. + // Furthermore, it may have been made available again, in which case it is + // now already in the AvailableQueue. + if (SU->isAvailable && !SU->NodeQueueId) { + DEBUG(dbgs() << " Repushing SU #" << SU->NodeNum << '\n'); + AvailableQueue->push(SU); + } + if (i < Interferences.size()) + Interferences[i-1] = Interferences.back(); + Interferences.pop_back(); + LRegsMap.erase(LRegsPos); + } +} + /// Return a node that can be scheduled in this cycle. Requirements: /// (1) Ready: latency has been satisfied /// (2) No Hazards: resources are available /// (3) No Interferences: may unschedule to break register interferences. SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { - SmallVector Interferences; - DenseMap > LRegsMap; - - SUnit *CurSU = AvailableQueue->pop(); + SUnit *CurSU = AvailableQueue->empty() ? 0 : AvailableQueue->pop(); while (CurSU) { SmallVector LRegs; if (!DelayForLiveRegsBottomUp(CurSU, LRegs)) break; - LRegsMap.insert(std::make_pair(CurSU, LRegs)); - - CurSU->isPending = true; // This SU is not in AvailableQueue right now. - Interferences.push_back(CurSU); + DEBUG(dbgs() << " Interfering reg " << + (LRegs[0] == TRI->getNumRegs() ? "CallResource" + : TRI->getName(LRegs[0])) + << " SU #" << CurSU->NodeNum << '\n'); + std::pair LRegsPair = + LRegsMap.insert(std::make_pair(CurSU, LRegs)); + if (LRegsPair.second) { + CurSU->isPending = true; // This SU is not in AvailableQueue right now. + Interferences.push_back(CurSU); + } + else { + assert(CurSU->isPending && "Intereferences are pending"); + // Update the interference with current live regs. + LRegsPair.first->second = LRegs; + } CurSU = AvailableQueue->pop(); } - if (CurSU) { - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - assert(Interferences[i]->isAvailable && "must still be available"); - AvailableQueue->push(Interferences[i]); - } + if (CurSU) return CurSU; - } // All candidates are delayed due to live physical reg dependencies. // Try backtracking, code duplication, or inserting cross class copies @@ -1353,6 +1399,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { } } if (!WillCreateCycle(TrySU, BtSU)) { + // BacktrackBottomUp mutates Interferences! BacktrackBottomUp(TrySU, BtSU); // Force the current node to be scheduled before the node that @@ -1362,19 +1409,19 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { if (!BtSU->isPending) AvailableQueue->remove(BtSU); } + DEBUG(dbgs() << "ARTIFICIAL edge from SU(" << BtSU->NodeNum << ") to SU(" + << TrySU->NodeNum << ")\n"); AddPred(TrySU, SDep(BtSU, SDep::Artificial)); // If one or more successors has been unscheduled, then the current - // node is no longer avaialable. Schedule a successor that's now - // available instead. - if (!TrySU->isAvailable) { + // node is no longer available. + if (!TrySU->isAvailable) CurSU = AvailableQueue->pop(); - } else { + AvailableQueue->remove(TrySU); CurSU = TrySU; - TrySU->isPending = false; - Interferences.erase(Interferences.begin()+i); } + // Interferences has been mutated. We must break. break; } } @@ -1425,17 +1472,7 @@ SUnit *ScheduleDAGRRList::PickNodeToScheduleBottomUp() { TrySU->isAvailable = false; CurSU = NewDef; } - assert(CurSU && "Unable to resolve live physical register dependencies!"); - - // Add the nodes that aren't ready back onto the available list. - for (unsigned i = 0, e = Interferences.size(); i != e; ++i) { - Interferences[i]->isPending = false; - // May no longer be available due to backtracking. - if (Interferences[i]->isAvailable) { - AvailableQueue->push(Interferences[i]); - } - } return CurSU; } @@ -1456,7 +1493,7 @@ void ScheduleDAGRRList::ListScheduleBottomUp() { // While Available queue is not empty, grab the node with the highest // priority. If it is not ready put it back. Schedule the node. Sequence.reserve(SUnits.size()); - while (!AvailableQueue->empty()) { + while (!AvailableQueue->empty() || !Interferences.empty()) { DEBUG(dbgs() << "\nExamining Available:\n"; AvailableQueue->dump(this)); @@ -1939,7 +1976,7 @@ bool RegReductionPQBase::MayReduceRegPressure(SUnit *SU) const { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (!N->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -1973,7 +2010,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { } for (ScheduleDAGSDNodes::RegDefIter RegDefPos(PredSU, scheduleDAG); RegDefPos.IsValid(); RegDefPos.Advance()) { - EVT VT = RegDefPos.GetValue(); + MVT VT = RegDefPos.GetValue(); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); if (RegPressure[RCId] >= RegLimit[RCId]) ++PDiff; @@ -1986,7 +2023,7 @@ int RegReductionPQBase::RegPressureDiff(SUnit *SU, unsigned &LiveUses) const { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (!N->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -2097,7 +2134,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { const SDNode *PN = PredSU->getNode(); if (!PN->isMachineOpcode()) { if (PN->getOpcode() == ISD::CopyFromReg) { - EVT VT = PN->getValueType(0); + MVT VT = PN->getSimpleValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); } @@ -2109,14 +2146,14 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (POpc == TargetOpcode::EXTRACT_SUBREG || POpc == TargetOpcode::INSERT_SUBREG || POpc == TargetOpcode::SUBREG_TO_REG) { - EVT VT = PN->getValueType(0); + MVT VT = PN->getSimpleValueType(0); unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); RegPressure[RCId] += TLI->getRepRegClassCostFor(VT); continue; } unsigned NumDefs = TII->get(PN->getMachineOpcode()).getNumDefs(); for (unsigned i = 0; i != NumDefs; ++i) { - EVT VT = PN->getValueType(i); + MVT VT = PN->getSimpleValueType(i); if (!PN->hasAnyUseOfValue(i)) continue; unsigned RCId = TLI->getRepRegClassFor(VT)->getID(); @@ -2133,7 +2170,7 @@ void RegReductionPQBase::unscheduledNode(SUnit *SU) { if (SU->NumSuccs && N->isMachineOpcode()) { unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); for (unsigned i = NumDefs, e = N->getNumValues(); i != e; ++i) { - EVT VT = N->getValueType(i); + MVT VT = N->getSimpleValueType(i); if (VT == MVT::Glue || VT == MVT::Other) continue; if (!N->hasAnyUseOfValue(i)) diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp index a197fcbfa593..b22440daf16d 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.cpp @@ -13,26 +13,26 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" -#include "SDNodeDbgValue.h" #include "ScheduleDAGSDNodes.h" #include "InstrEmitter.h" -#include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetSubtargetInfo.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/SelectionDAG.h" +#include "llvm/MC/MCInstrItineraries.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" using namespace llvm; STATISTIC(LoadsClustered, "Number of loads clustered together"); @@ -562,7 +562,7 @@ void ScheduleDAGSDNodes::RegDefIter::Advance() { for (;DefIdx < NodeNumDefs; ++DefIdx) { if (!Node->hasAnyUseOfValue(DefIdx)) continue; - ValueType = Node->getValueType(DefIdx); + ValueType = Node->getSimpleValueType(DefIdx); ++DefIdx; return; // Found a normal regdef. } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h index 907356fd212c..2ff37e0a15e1 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGSDNodes.h @@ -15,8 +15,8 @@ #ifndef SCHEDULEDAGSDNODES_H #define SCHEDULEDAGSDNODES_H +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/ScheduleDAG.h" -#include "llvm/CodeGen/SelectionDAG.h" namespace llvm { /// ScheduleDAGSDNodes - A ScheduleDAG for scheduling SDNode-based DAGs. @@ -135,13 +135,13 @@ namespace llvm { const SDNode *Node; unsigned DefIdx; unsigned NodeNumDefs; - EVT ValueType; + MVT ValueType; public: RegDefIter(const SUnit *SU, const ScheduleDAGSDNodes *SD); bool IsValid() const { return Node != NULL; } - EVT GetValue() const { + MVT GetValue() const { assert(IsValid() && "bad iterator"); return ValueType; } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp index 30f03ac737b9..58aa1fe0ebbe 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGVLIW.cpp @@ -19,19 +19,19 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "pre-RA-sched" +#include "llvm/CodeGen/SchedulerRegistry.h" #include "ScheduleDAGSDNodes.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LatencyPriorityQueue.h" +#include "llvm/CodeGen/ResourcePriorityQueue.h" #include "llvm/CodeGen/ScheduleHazardRecognizer.h" -#include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/IR/DataLayout.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/ResourcePriorityQueue.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -123,6 +123,8 @@ void ScheduleDAGVLIW::releaseSucc(SUnit *SU, const SDep &D) { llvm_unreachable(0); } #endif + assert(!D.isWeak() && "unexpected artificial DAG edge"); + --SuccSU->NumPredsLeft; SuccSU->setDepthToAtLeast(SU->getDepth() + D.getLatency()); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index f000ce38d367..64244313a326 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -12,42 +12,43 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/SelectionDAG.h" -#include "SDNodeOrdering.h" #include "SDNodeDbgValue.h" -#include "llvm/CallingConv.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalAlias.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Intrinsics.h" +#include "SDNodeOrdering.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/Assembly/Writer.h" #include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetSelectionDAGInfo.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalAlias.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Intrinsics.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Mutex.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSelectionDAGInfo.h" #include #include using namespace llvm; @@ -59,18 +60,6 @@ static SDVTList makeVTList(const EVT *VTs, unsigned NumVTs) { return Res; } -static const fltSemantics *EVTToAPFloatSemantics(EVT VT) { - switch (VT.getSimpleVT().SimpleTy) { - default: llvm_unreachable("Unknown FP format"); - case MVT::f16: return &APFloat::IEEEhalf; - case MVT::f32: return &APFloat::IEEEsingle; - case MVT::f64: return &APFloat::IEEEdouble; - case MVT::f80: return &APFloat::x87DoubleExtended; - case MVT::f128: return &APFloat::IEEEquad; - case MVT::ppcf128: return &APFloat::PPCDoubleDouble; - } -} - // Default null implementations of the callbacks. void SelectionDAG::DAGUpdateListener::NodeDeleted(SDNode*, SDNode*) {} void SelectionDAG::DAGUpdateListener::NodeUpdated(SDNode*) {} @@ -94,7 +83,8 @@ bool ConstantFPSDNode::isValueValidForType(EVT VT, // convert modifies in place, so make a copy. APFloat Val2 = APFloat(Val); bool losesInfo; - (void) Val2.convert(*EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, + (void) Val2.convert(SelectionDAG::EVTToAPFloatSemantics(VT), + APFloat::rmNearestTiesToEven, &losesInfo); return !losesInfo; } @@ -884,15 +874,17 @@ unsigned SelectionDAG::getEVTAlignment(EVT VT) const { // EntryNode could meaningfully have debug info if we can find it... SelectionDAG::SelectionDAG(const TargetMachine &tm, CodeGenOpt::Level OL) : TM(tm), TLI(*tm.getTargetLowering()), TSI(*tm.getSelectionDAGInfo()), - OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), getVTList(MVT::Other)), + TTI(0), OptLevel(OL), EntryNode(ISD::EntryToken, DebugLoc(), + getVTList(MVT::Other)), Root(getEntryNode()), Ordering(0), UpdateListeners(0) { AllNodes.push_back(&EntryNode); Ordering = new SDNodeOrdering(); DbgInfo = new SDDbgInfo(); } -void SelectionDAG::init(MachineFunction &mf) { +void SelectionDAG::init(MachineFunction &mf, const TargetTransformInfo *tti) { MF = &mf; + TTI = tti; Context = &mf.getFunction()->getContext(); } @@ -1074,10 +1066,11 @@ SDValue SelectionDAG::getConstantFP(double Val, EVT VT, bool isTarget) { return getConstantFP(APFloat((float)Val), VT, isTarget); else if (EltVT==MVT::f64) return getConstantFP(APFloat(Val), VT, isTarget); - else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::f16) { + else if (EltVT==MVT::f80 || EltVT==MVT::f128 || EltVT==MVT::ppcf128 || + EltVT==MVT::f16) { bool ignored; APFloat apf = APFloat(Val); - apf.convert(*EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, + apf.convert(EVTToAPFloatSemantics(EltVT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(apf, VT, isTarget); } else @@ -1525,7 +1518,7 @@ SDValue SelectionDAG::getMDNode(const MDNode *MD) { /// the target's desired shift amount type. SDValue SelectionDAG::getShiftAmountOperand(EVT LHSTy, SDValue Op) { EVT OpTy = Op.getValueType(); - MVT ShTy = TLI.getShiftAmountTy(LHSTy); + EVT ShTy = TLI.getShiftAmountTy(LHSTy); if (OpTy == ShTy || OpTy.isVector()) return Op; ISD::NodeType Opcode = OpTy.bitsGT(ShTy) ? ISD::TRUNCATE : ISD::ZERO_EXTEND; @@ -1924,7 +1917,8 @@ void SelectionDAG::ComputeMaskedBits(SDValue Op, APInt &KnownZero, } case ISD::LOAD: { LoadSDNode *LD = cast(Op); - if (ISD::isZEXTLoad(Op.getNode())) { + // If this is a ZEXTLoad and we are looking at the loaded value. + if (ISD::isZEXTLoad(Op.getNode()) && Op.getResNo() == 0) { EVT VT = LD->getMemoryVT(); unsigned MemBits = VT.getScalarType().getSizeInBits(); KnownZero |= APInt::getHighBitsSet(BitWidth, BitWidth - MemBits); @@ -2294,17 +2288,20 @@ unsigned SelectionDAG::ComputeNumSignBits(SDValue Op, unsigned Depth) const{ break; } - // Handle LOADX separately here. EXTLOAD case will fallthrough. - if (LoadSDNode *LD = dyn_cast(Op)) { - unsigned ExtType = LD->getExtensionType(); - switch (ExtType) { - default: break; - case ISD::SEXTLOAD: // '17' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp+1; - case ISD::ZEXTLOAD: // '16' bits known - Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); - return VTBits-Tmp; + // If we are looking at the loaded value of the SDNode. + if (Op.getResNo() == 0) { + // Handle LOADX separately here. EXTLOAD case will fallthrough. + if (LoadSDNode *LD = dyn_cast(Op)) { + unsigned ExtType = LD->getExtensionType(); + switch (ExtType) { + default: break; + case ISD::SEXTLOAD: // '17' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp+1; + case ISD::ZEXTLOAD: // '16' bits known + Tmp = LD->getMemoryVT().getScalarType().getSizeInBits(); + return VTBits-Tmp; + } } } @@ -2438,7 +2435,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return getConstant(Val.zextOrTrunc(VT.getSizeInBits()), VT); case ISD::UINT_TO_FP: case ISD::SINT_TO_FP: { - APFloat apf(APInt::getNullValue(VT.getSizeInBits())); + APFloat apf(EVTToAPFloatSemantics(VT), + APInt::getNullValue(VT.getSizeInBits())); (void)apf.convertFromAPInt(Val, Opcode==ISD::SINT_TO_FP, APFloat::rmNearestTiesToEven); @@ -2446,9 +2444,9 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, } case ISD::BITCAST: if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEsingle, Val), VT); else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) - return getConstantFP(APFloat(Val), VT); + return getConstantFP(APFloat(APFloat::IEEEdouble, Val), VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), VT); @@ -2495,7 +2493,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -2686,44 +2684,117 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, return SDValue(N, 0); } -SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, - EVT VT, - ConstantSDNode *Cst1, - ConstantSDNode *Cst2) { - const APInt &C1 = Cst1->getAPIntValue(), &C2 = Cst2->getAPIntValue(); +SDValue SelectionDAG::FoldConstantArithmetic(unsigned Opcode, EVT VT, + SDNode *Cst1, SDNode *Cst2) { + SmallVector, 4> Inputs; + SmallVector Outputs; + EVT SVT = VT.getScalarType(); - switch (Opcode) { - case ISD::ADD: return getConstant(C1 + C2, VT); - case ISD::SUB: return getConstant(C1 - C2, VT); - case ISD::MUL: return getConstant(C1 * C2, VT); - case ISD::UDIV: - if (C2.getBoolValue()) return getConstant(C1.udiv(C2), VT); - break; - case ISD::UREM: - if (C2.getBoolValue()) return getConstant(C1.urem(C2), VT); - break; - case ISD::SDIV: - if (C2.getBoolValue()) return getConstant(C1.sdiv(C2), VT); - break; - case ISD::SREM: - if (C2.getBoolValue()) return getConstant(C1.srem(C2), VT); - break; - case ISD::AND: return getConstant(C1 & C2, VT); - case ISD::OR: return getConstant(C1 | C2, VT); - case ISD::XOR: return getConstant(C1 ^ C2, VT); - case ISD::SHL: return getConstant(C1 << C2, VT); - case ISD::SRL: return getConstant(C1.lshr(C2), VT); - case ISD::SRA: return getConstant(C1.ashr(C2), VT); - case ISD::ROTL: return getConstant(C1.rotl(C2), VT); - case ISD::ROTR: return getConstant(C1.rotr(C2), VT); - default: break; + ConstantSDNode *Scalar1 = dyn_cast(Cst1); + ConstantSDNode *Scalar2 = dyn_cast(Cst2); + if (Scalar1 && Scalar2) { + // Scalar instruction. + Inputs.push_back(std::make_pair(Scalar1, Scalar2)); + } else { + // For vectors extract each constant element into Inputs so we can constant + // fold them individually. + BuildVectorSDNode *BV1 = dyn_cast(Cst1); + BuildVectorSDNode *BV2 = dyn_cast(Cst2); + if (!BV1 || !BV2) + return SDValue(); + + assert(BV1->getNumOperands() == BV2->getNumOperands() && "Out of sync!"); + + for (unsigned I = 0, E = BV1->getNumOperands(); I != E; ++I) { + ConstantSDNode *V1 = dyn_cast(BV1->getOperand(I)); + ConstantSDNode *V2 = dyn_cast(BV2->getOperand(I)); + if (!V1 || !V2) // Not a constant, bail. + return SDValue(); + + // Avoid BUILD_VECTOR nodes that perform implicit truncation. + // FIXME: This is valid and could be handled by truncating the APInts. + if (V1->getValueType(0) != SVT || V2->getValueType(0) != SVT) + return SDValue(); + + Inputs.push_back(std::make_pair(V1, V2)); + } } - return SDValue(); + // We have a number of constant values, constant fold them element by element. + for (unsigned I = 0, E = Inputs.size(); I != E; ++I) { + const APInt &C1 = Inputs[I].first->getAPIntValue(); + const APInt &C2 = Inputs[I].second->getAPIntValue(); + + switch (Opcode) { + case ISD::ADD: + Outputs.push_back(getConstant(C1 + C2, SVT)); + break; + case ISD::SUB: + Outputs.push_back(getConstant(C1 - C2, SVT)); + break; + case ISD::MUL: + Outputs.push_back(getConstant(C1 * C2, SVT)); + break; + case ISD::UDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.udiv(C2), SVT)); + break; + case ISD::UREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.urem(C2), SVT)); + break; + case ISD::SDIV: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.sdiv(C2), SVT)); + break; + case ISD::SREM: + if (!C2.getBoolValue()) + return SDValue(); + Outputs.push_back(getConstant(C1.srem(C2), SVT)); + break; + case ISD::AND: + Outputs.push_back(getConstant(C1 & C2, SVT)); + break; + case ISD::OR: + Outputs.push_back(getConstant(C1 | C2, SVT)); + break; + case ISD::XOR: + Outputs.push_back(getConstant(C1 ^ C2, SVT)); + break; + case ISD::SHL: + Outputs.push_back(getConstant(C1 << C2, SVT)); + break; + case ISD::SRL: + Outputs.push_back(getConstant(C1.lshr(C2), SVT)); + break; + case ISD::SRA: + Outputs.push_back(getConstant(C1.ashr(C2), SVT)); + break; + case ISD::ROTL: + Outputs.push_back(getConstant(C1.rotl(C2), SVT)); + break; + case ISD::ROTR: + Outputs.push_back(getConstant(C1.rotr(C2), SVT)); + break; + default: + return SDValue(); + } + } + + // Handle the scalar case first. + if (Outputs.size() == 1) + return Outputs.back(); + + // Otherwise build a big vector out of the scalar elements we generated. + return getNode(ISD::BUILD_VECTOR, DebugLoc(), VT, Outputs.data(), + Outputs.size()); } -SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, - SDValue N1, SDValue N2) { +SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, SDValue N1, + SDValue N2) { ConstantSDNode *N1C = dyn_cast(N1.getNode()); ConstantSDNode *N2C = dyn_cast(N2.getNode()); switch (Opcode) { @@ -2845,6 +2916,8 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, "Shift operators return type must be the same as their first arg"); assert(VT.isInteger() && N2.getValueType().isInteger() && "Shifts only work on integers"); + assert((!VT.isVector() || VT == N2.getValueType()) && + "Vector shift amounts must be in the same as their first arg"); // Verify that the shift amount VT is bit enough to hold valid shift // amounts. This catches things like trying to shift an i1024 value by an // i8, which is easy to fall into in generic code that uses @@ -3019,16 +3092,14 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } } - if (N1C) { - if (N2C) { - SDValue SV = FoldConstantArithmetic(Opcode, VT, N1C, N2C); - if (SV.getNode()) return SV; - } else { // Cannonicalize constant to RHS if commutative - if (isCommutativeBinOp(Opcode)) { - std::swap(N1C, N2C); - std::swap(N1, N2); - } - } + // Perform trivial constant folding. + SDValue SV = FoldConstantArithmetic(Opcode, VT, N1.getNode(), N2.getNode()); + if (SV.getNode()) return SV; + + // Canonicalize constant to RHS if commutative. + if (N1C && !N2C && isCommutativeBinOp(Opcode)) { + std::swap(N1C, N2C); + std::swap(N1, N2); } // Constant fold FP operations. @@ -3036,7 +3107,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, ConstantFPSDNode *N2CFP = dyn_cast(N2.getNode()); if (N1CFP) { if (!N2CFP && isCommutativeBinOp(Opcode)) { - // Cannonicalize constant to RHS if commutative + // Canonicalize constant to RHS if commutative. std::swap(N1CFP, N2CFP); std::swap(N1, N2); } else if (N2CFP) { @@ -3080,7 +3151,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, bool ignored; // This can return overflow, underflow, or inexact; we don't care. // FIXME need to be more flexible about rounding mode. - (void)V.convert(*EVTToAPFloatSemantics(VT), + (void)V.convert(EVTToAPFloatSemantics(VT), APFloat::rmNearestTiesToEven, &ignored); return getConstantFP(V, VT); } @@ -3312,17 +3383,6 @@ SDValue SelectionDAG::getStackArgumentTokenFactor(SDValue Chain) { &ArgChains[0], ArgChains.size()); } -/// SplatByte - Distribute ByteVal over NumBits bits. -static APInt SplatByte(unsigned NumBits, uint8_t ByteVal) { - APInt Val = APInt(NumBits, ByteVal); - unsigned Shift = 8; - for (unsigned i = NumBits; i > 8; i >>= 1) { - Val = (Val << Shift) | Val; - Shift <<= 1; - } - return Val; -} - /// getMemsetValue - Vectorized representation of the memset value /// operand. static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, @@ -3331,17 +3391,18 @@ static SDValue getMemsetValue(SDValue Value, EVT VT, SelectionDAG &DAG, unsigned NumBits = VT.getScalarType().getSizeInBits(); if (ConstantSDNode *C = dyn_cast(Value)) { - APInt Val = SplatByte(NumBits, C->getZExtValue() & 255); + assert(C->getAPIntValue().getBitWidth() == 8); + APInt Val = APInt::getSplat(NumBits, C->getAPIntValue()); if (VT.isInteger()) return DAG.getConstant(Val, VT); - return DAG.getConstantFP(APFloat(Val), VT); + return DAG.getConstantFP(APFloat(DAG.EVTToAPFloatSemantics(VT), Val), VT); } Value = DAG.getNode(ISD::ZERO_EXTEND, dl, VT, Value); if (NumBits > 8) { // Use a multiplication with 0x010101... to extend the input to the // required length. - APInt Magic = SplatByte(NumBits, 0x01); + APInt Magic = APInt::getSplat(NumBits, APInt(8, 0x01)); Value = DAG.getNode(ISD::MUL, dl, VT, Value, DAG.getConstant(Magic, VT)); } @@ -3370,10 +3431,11 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, } assert(!VT.isVector() && "Can't handle vector type here!"); - unsigned NumVTBytes = VT.getSizeInBits() / 8; + unsigned NumVTBits = VT.getSizeInBits(); + unsigned NumVTBytes = NumVTBits / 8; unsigned NumBytes = std::min(NumVTBytes, unsigned(Str.size())); - uint64_t Val = 0; + APInt Val(NumVTBits, 0); if (TLI.isLittleEndian()) { for (unsigned i = 0; i != NumBytes; ++i) Val |= (uint64_t)(unsigned char)Str[i] << i*8; @@ -3382,7 +3444,12 @@ static SDValue getMemsetStringVal(EVT VT, DebugLoc dl, SelectionDAG &DAG, Val |= (uint64_t)(unsigned char)Str[i] << (NumVTBytes-i-1)*8; } - return DAG.getConstant(Val, VT); + // If the "cost" of materializing the integer immediate is 1 or free, then + // it is cost effective to turn the load into the immediate. + const TargetTransformInfo *TTI = DAG.getTargetTransformInfo(); + if (TTI->getIntImmCost(Val, VT.getTypeForEVT(*DAG.getContext())) < 2) + return DAG.getConstant(Val, VT); + return SDValue(0, 0); } /// getMemBasePlusOffset - Returns base and offset node for the @@ -3420,8 +3487,10 @@ static bool isMemSrcFromString(SDValue Src, StringRef &Str) { static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned Limit, uint64_t Size, unsigned DstAlign, unsigned SrcAlign, - bool IsZeroVal, + bool IsMemset, + bool ZeroMemset, bool MemcpyStrSrc, + bool AllowOverlap, SelectionDAG &DAG, const TargetLowering &TLI) { assert((SrcAlign == 0 || SrcAlign >= DstAlign) && @@ -3434,7 +3503,7 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, // 'MemcpyStrSrc' indicates whether the memcpy source is constant so it does // not need to be loaded. EVT VT = TLI.getOptimalMemOpType(Size, DstAlign, SrcAlign, - IsZeroVal, MemcpyStrSrc, + IsMemset, ZeroMemset, MemcpyStrSrc, DAG.getMachineFunction()); if (VT == MVT::Other) { @@ -3464,21 +3533,51 @@ static bool FindOptimalMemOpLowering(std::vector &MemOps, unsigned VTSize = VT.getSizeInBits() / 8; while (VTSize > Size) { // For now, only use non-vector load / store's for the left-over pieces. + EVT NewVT = VT; + unsigned NewVTSize; + + bool Found = false; if (VT.isVector() || VT.isFloatingPoint()) { - VT = MVT::i64; - while (!TLI.isTypeLegal(VT)) - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize = VT.getSizeInBits() / 8; - } else { - // This can result in a type that is not legal on the target, e.g. - // 1 or 2 bytes on PPC. - VT = (MVT::SimpleValueType)(VT.getSimpleVT().SimpleTy - 1); - VTSize >>= 1; + NewVT = (VT.getSizeInBits() > 64) ? MVT::i64 : MVT::i32; + if (TLI.isOperationLegalOrCustom(ISD::STORE, NewVT) && + TLI.isSafeMemOpType(NewVT.getSimpleVT())) + Found = true; + else if (NewVT == MVT::i64 && + TLI.isOperationLegalOrCustom(ISD::STORE, MVT::f64) && + TLI.isSafeMemOpType(MVT::f64)) { + // i64 is usually not legal on 32-bit targets, but f64 may be. + NewVT = MVT::f64; + Found = true; + } + } + + if (!Found) { + do { + NewVT = (MVT::SimpleValueType)(NewVT.getSimpleVT().SimpleTy - 1); + if (NewVT == MVT::i8) + break; + } while (!TLI.isSafeMemOpType(NewVT.getSimpleVT())); + } + NewVTSize = NewVT.getSizeInBits() / 8; + + // If the new VT cannot cover all of the remaining bits, then consider + // issuing a (or a pair of) unaligned and overlapping load / store. + // FIXME: Only does this for 64-bit or more since we don't have proper + // cost model for unaligned load / store. + bool Fast; + if (NumMemOps && AllowOverlap && + VTSize >= 8 && NewVTSize < Size && + TLI.allowsUnalignedMemoryAccesses(VT, &Fast) && Fast) + VTSize = Size; + else { + VT = NewVT; + VTSize = NewVTSize; } } if (++NumMemOps > Limit) return false; + MemOps.push_back(VT); Size -= VTSize; } @@ -3507,8 +3606,8 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); bool OptSize = - MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3523,12 +3622,21 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, if (!FindOptimalMemOpLowering(MemOps, Limit, Size, (DstAlignCanChange ? 0 : Align), (isZeroStr ? 0 : SrcAlign), - true, CopyFromStr, DAG, TLI)) + false, false, CopyFromStr, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { Type *Ty = MemOps[0].getTypeForEVT(*DAG.getContext()); unsigned NewAlign = (unsigned) TLI.getDataLayout()->getABITypeAlignment(Ty); + + // Don't promote to an alignment that would require dynamic stack + // realignment. + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (!TRI->needsStackRealignment(MF)) + while (NewAlign > Align && + TLI.getDataLayout()->exceedsNaturalStackAlignment(NewAlign)) + NewAlign /= 2; + if (NewAlign > Align) { // Give the stack frame object a larger alignment if needed. if (MFI->getObjectAlignment(FI->getIndex()) < NewAlign) @@ -3545,6 +3653,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned VTSize = VT.getSizeInBits() / 8; SDValue Value, Store; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + SrcOff -= VTSize - Size; + DstOff -= VTSize - Size; + } + if (CopyFromStr && (isZeroStr || (VT.isInteger() && !VT.isVector()))) { // It's unlikely a store of a vector immediate can be done in a single @@ -3553,11 +3669,14 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, // FIXME: Handle other cases where store of vector immediate is done in // a single instruction. Value = getMemsetStringVal(VT, dl, DAG, TLI, Str.substr(SrcOff)); - Store = DAG.getStore(Chain, dl, Value, - getMemBasePlusOffset(Dst, DstOff, DAG), - DstPtrInfo.getWithOffset(DstOff), isVol, - false, Align); - } else { + if (Value.getNode()) + Store = DAG.getStore(Chain, dl, Value, + getMemBasePlusOffset(Dst, DstOff, DAG), + DstPtrInfo.getWithOffset(DstOff), isVol, + false, Align); + } + + if (!Store.getNode()) { // The type might not be legal for the target. This should only happen // if the type is smaller than a legal type, as on PPC, so the right // thing to do is generate a LoadExt/StoreTrunc pair. These simplify @@ -3577,6 +3696,7 @@ static SDValue getMemcpyLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, OutChains.push_back(Store); SrcOff += VTSize; DstOff += VTSize; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3601,8 +3721,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3612,8 +3732,8 @@ static SDValue getMemmoveLoadsAndStores(SelectionDAG &DAG, DebugLoc dl, unsigned Limit = AlwaysInline ? ~0U : TLI.getMaxStoresPerMemmove(OptSize); if (!FindOptimalMemOpLowering(MemOps, Limit, Size, - (DstAlignCanChange ? 0 : Align), - SrcAlign, true, false, DAG, TLI)) + (DstAlignCanChange ? 0 : Align), SrcAlign, + false, false, false, false, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3680,8 +3800,8 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, bool DstAlignCanChange = false; MachineFunction &MF = DAG.getMachineFunction(); MachineFrameInfo *MFI = MF.getFrameInfo(); - bool OptSize = MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize); + bool OptSize = MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize); FrameIndexSDNode *FI = dyn_cast(Dst); if (FI && !MFI->isFixedObjectIndex(FI->getIndex())) DstAlignCanChange = true; @@ -3689,7 +3809,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isa(Src) && cast(Src)->isNullValue(); if (!FindOptimalMemOpLowering(MemOps, TLI.getMaxStoresPerMemset(OptSize), Size, (DstAlignCanChange ? 0 : Align), 0, - IsZeroVal, false, DAG, TLI)) + true, IsZeroVal, false, true, DAG, TLI)) return SDValue(); if (DstAlignCanChange) { @@ -3716,6 +3836,13 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, for (unsigned i = 0; i < NumMemOps; i++) { EVT VT = MemOps[i]; + unsigned VTSize = VT.getSizeInBits() / 8; + if (VTSize > Size) { + // Issuing an unaligned load / store pair that overlaps with the previous + // pair. Adjust the offset accordingly. + assert(i == NumMemOps-1 && i != 0); + DstOff -= VTSize - Size; + } // If this store is smaller than the largest store see whether we can get // the smaller value for free with a truncate. @@ -3734,6 +3861,7 @@ static SDValue getMemsetStores(SelectionDAG &DAG, DebugLoc dl, isVol, false, Align); OutChains.push_back(Store); DstOff += VT.getSizeInBits() / 8; + Size -= VTSize; } return DAG.getNode(ISD::TokenFactor, dl, MVT::Other, @@ -3745,6 +3873,7 @@ SDValue SelectionDAG::getMemcpy(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memcpy to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3812,6 +3941,7 @@ SDValue SelectionDAG::getMemmove(SDValue Chain, DebugLoc dl, SDValue Dst, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memmove to loads and stores first. // For cases within the target-specified limits, this is the best choice. @@ -3866,6 +3996,7 @@ SDValue SelectionDAG::getMemset(SDValue Chain, DebugLoc dl, SDValue Dst, SDValue Src, SDValue Size, unsigned Align, bool isVol, MachinePointerInfo DstPtrInfo) { + assert(Align && "The SDAG layer expects explicit alignment and reserves 0"); // Check to see if we should lower the memset to stores first. // For cases within the target-specified limits, this is the best choice. @@ -4577,7 +4708,7 @@ SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, EVT VT, } SDValue SelectionDAG::getNode(unsigned Opcode, DebugLoc DL, - const std::vector &ResultTys, + ArrayRef ResultTys, const SDValue *Ops, unsigned NumOps) { return getNode(Opcode, DL, getVTList(&ResultTys[0], ResultTys.size()), Ops, NumOps); @@ -5229,7 +5360,7 @@ SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, EVT VT1, MachineSDNode * SelectionDAG::getMachineNode(unsigned Opcode, DebugLoc dl, - const std::vector &ResultTys, + ArrayRef ResultTys, const SDValue *Ops, unsigned NumOps) { SDVTList VTs = getVTList(&ResultTys[0], ResultTys.size()); return getMachineNode(Opcode, dl, VTs, Ops, NumOps); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 3fbf7c2fe66b..ce40cd6a0c9c 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -12,51 +12,51 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" -#include "SDNodeDbgValue.h" #include "SelectionDAGBuilder.h" +#include "SDNodeDbgValue.h" #include "llvm/ADT/BitVector.h" -#include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/ConstantFolding.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/Constants.h" -#include "llvm/CallingConv.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" -#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/DataLayout.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/IntegersSubsetMapping.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetOptions.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/IntegersSubsetMapping.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/MathExtras.h" -#include "llvm/Support/raw_ostream.h" #include using namespace llvm; @@ -89,7 +89,7 @@ static const unsigned MaxParallelChains = 64; static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT, const Value *V); + MVT PartVT, EVT ValueVT, const Value *V); /// getCopyFromParts - Create a value that contains the specified legal parts /// combined into the value they represent. If the parts combine to a type @@ -98,7 +98,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, /// (ISD::AssertSext). static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, - unsigned NumParts, EVT PartVT, EVT ValueVT, + unsigned NumParts, MVT PartVT, EVT ValueVT, const Value *V, ISD::NodeType AssertOp = ISD::DELETED_NODE) { if (ValueVT.isVector()) @@ -161,7 +161,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, } } else if (PartVT.isFloatingPoint()) { // FP split into multiple FP parts (for ppcf128) - assert(ValueVT == EVT(MVT::ppcf128) && PartVT == EVT(MVT::f64) && + assert(ValueVT == EVT(MVT::ppcf128) && PartVT == MVT::f64 && "Unexpected split"); SDValue Lo, Hi; Lo = DAG.getNode(ISD::BITCAST, DL, EVT(MVT::f64), Parts[0]); @@ -179,25 +179,25 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, } // There is now one part, held in Val. Correct it to match ValueVT. - PartVT = Val.getValueType(); + EVT PartEVT = Val.getValueType(); - if (PartVT == ValueVT) + if (PartEVT == ValueVT) return Val; - if (PartVT.isInteger() && ValueVT.isInteger()) { - if (ValueVT.bitsLT(PartVT)) { + if (PartEVT.isInteger() && ValueVT.isInteger()) { + if (ValueVT.bitsLT(PartEVT)) { // For a truncate, see if we have any information to // indicate whether the truncated bits will always be // zero or sign-extension. if (AssertOp != ISD::DELETED_NODE) - Val = DAG.getNode(AssertOp, DL, PartVT, Val, + Val = DAG.getNode(AssertOp, DL, PartEVT, Val, DAG.getValueType(ValueVT)); return DAG.getNode(ISD::TRUNCATE, DL, ValueVT, Val); } return DAG.getNode(ISD::ANY_EXTEND, DL, ValueVT, Val); } - if (PartVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { + if (PartEVT.isFloatingPoint() && ValueVT.isFloatingPoint()) { // FP_ROUND's are always exact here. if (ValueVT.bitsLT(Val.getValueType())) return DAG.getNode(ISD::FP_ROUND, DL, ValueVT, Val, @@ -206,7 +206,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, return DAG.getNode(ISD::FP_EXTEND, DL, ValueVT, Val); } - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); llvm_unreachable("Unknown mismatch!"); @@ -219,7 +219,7 @@ static SDValue getCopyFromParts(SelectionDAG &DAG, DebugLoc DL, /// ValueVT (ISD::AssertSext). static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, const SDValue *Parts, unsigned NumParts, - EVT PartVT, EVT ValueVT, const Value *V) { + MVT PartVT, EVT ValueVT, const Value *V) { assert(ValueVT.isVector() && "Not a vector value"); assert(NumParts > 0 && "No parts to assemble!"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); @@ -227,7 +227,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // Handle a multi-element vector. if (NumParts > 1) { - EVT IntermediateVT, RegisterVT; + EVT IntermediateVT; + MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -235,7 +236,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, assert(NumRegs == NumParts && "Part count doesn't match vector breakdown!"); NumParts = NumRegs; // Silence a compiler warning. assert(RegisterVT == PartVT && "Part type doesn't match vector breakdown!"); - assert(RegisterVT == Parts[0].getValueType() && + assert(RegisterVT == Parts[0].getSimpleValueType() && "Part type doesn't match part!"); // Assemble the parts into intermediate operands. @@ -265,31 +266,31 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } // There is now one part, held in Val. Correct it to match ValueVT. - PartVT = Val.getValueType(); + EVT PartEVT = Val.getValueType(); - if (PartVT == ValueVT) + if (PartEVT == ValueVT) return Val; - if (PartVT.isVector()) { + if (PartEVT.isVector()) { // If the element type of the source/dest vectors are the same, but the // parts vector has more elements than the value vector, then we have a // vector widening case (e.g. <2 x float> -> <4 x float>). Extract the // elements we want. - if (PartVT.getVectorElementType() == ValueVT.getVectorElementType()) { - assert(PartVT.getVectorNumElements() > ValueVT.getVectorNumElements() && + if (PartEVT.getVectorElementType() == ValueVT.getVectorElementType()) { + assert(PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements() && "Cannot narrow, it would be a lossy transformation"); return DAG.getNode(ISD::EXTRACT_SUBVECTOR, DL, ValueVT, Val, DAG.getIntPtrConstant(0)); } // Vector/Vector bitcast. - if (ValueVT.getSizeInBits() == PartVT.getSizeInBits()) + if (ValueVT.getSizeInBits() == PartEVT.getSizeInBits()) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); - assert(PartVT.getVectorNumElements() == ValueVT.getVectorNumElements() && + assert(PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements() && "Cannot handle this kind of promotion"); // Promoted vector extract - bool Smaller = ValueVT.bitsLE(PartVT); + bool Smaller = ValueVT.bitsLE(PartEVT); return DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, ValueVT, Val); @@ -297,7 +298,7 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, // Trivial bitcast if the types are the same size and the destination // vector type is legal. - if (PartVT.getSizeInBits() == ValueVT.getSizeInBits() && + if (PartEVT.getSizeInBits() == ValueVT.getSizeInBits() && TLI.isTypeLegal(ValueVT)) return DAG.getNode(ISD::BITCAST, DL, ValueVT, Val); @@ -317,8 +318,8 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, } if (ValueVT.getVectorNumElements() == 1 && - ValueVT.getVectorElementType() != PartVT) { - bool Smaller = ValueVT.bitsLE(PartVT); + ValueVT.getVectorElementType() != PartEVT) { + bool Smaller = ValueVT.bitsLE(PartEVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, ValueVT.getScalarType(), Val); } @@ -328,14 +329,14 @@ static SDValue getCopyFromPartsVector(SelectionDAG &DAG, DebugLoc DL, static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc dl, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, const Value *V); + MVT PartVT, const Value *V); /// getCopyToParts - Create a series of nodes that contain the specified value /// split into legal parts. If the parts contain more bits than Val, then, for /// integers, ExtendKind can be used to specify how to generate the extra bits. static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, const Value *V, + MVT PartVT, const Value *V, ISD::NodeType ExtendKind = ISD::ANY_EXTEND) { EVT ValueVT = Val.getValueType(); @@ -352,7 +353,8 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, return; assert(!ValueVT.isVector() && "Vector case handled elsewhere"); - if (PartVT == ValueVT) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { assert(NumParts == 1 && "No-op copy with multiple parts!"); Parts[0] = Val; return; @@ -374,7 +376,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, } } else if (PartBits == ValueVT.getSizeInBits()) { // Different types of the same size. - assert(NumParts == 1 && PartVT != ValueVT); + assert(NumParts == 1 && PartEVT != ValueVT); Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (NumParts * PartBits < ValueVT.getSizeInBits()) { // If the parts cover less bits than value has, truncate the value. @@ -393,7 +395,7 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, "Failed to tile the value with PartVT!"); if (NumParts == 1) { - if (PartVT != ValueVT) { + if (PartEVT != ValueVT) { LLVMContext &Ctx = *DAG.getContext(); Twine ErrMsg("scalar-to-vector conversion failed"); if (const Instruction *I = dyn_cast_or_null(V)) { @@ -466,20 +468,21 @@ static void getCopyToParts(SelectionDAG &DAG, DebugLoc DL, /// value split into legal parts. static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, SDValue Val, SDValue *Parts, unsigned NumParts, - EVT PartVT, const Value *V) { + MVT PartVT, const Value *V) { EVT ValueVT = Val.getValueType(); assert(ValueVT.isVector() && "Not a vector"); const TargetLowering &TLI = DAG.getTargetLoweringInfo(); if (NumParts == 1) { - if (PartVT == ValueVT) { + EVT PartEVT = PartVT; + if (PartEVT == ValueVT) { // Nothing to do. } else if (PartVT.getSizeInBits() == ValueVT.getSizeInBits()) { // Bitconvert vector->vector case. Val = DAG.getNode(ISD::BITCAST, DL, PartVT, Val); } else if (PartVT.isVector() && - PartVT.getVectorElementType() == ValueVT.getVectorElementType() && - PartVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { + PartEVT.getVectorElementType() == ValueVT.getVectorElementType() && + PartEVT.getVectorNumElements() > ValueVT.getVectorNumElements()) { EVT ElementVT = PartVT.getVectorElementType(); // Vector widening case, e.g. <2 x float> -> <4 x float>. Shuffle in // undef elements. @@ -499,12 +502,12 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, //SDValue UndefElts = DAG.getUNDEF(VectorTy); //Val = DAG.getNode(ISD::CONCAT_VECTORS, DL, PartVT, Val, UndefElts); } else if (PartVT.isVector() && - PartVT.getVectorElementType().bitsGE( + PartEVT.getVectorElementType().bitsGE( ValueVT.getVectorElementType()) && - PartVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { + PartEVT.getVectorNumElements() == ValueVT.getVectorNumElements()) { // Promoted vector extract - bool Smaller = PartVT.bitsLE(ValueVT); + bool Smaller = PartEVT.bitsLE(ValueVT); Val = DAG.getNode((Smaller ? ISD::TRUNCATE : ISD::ANY_EXTEND), DL, PartVT, Val); } else{ @@ -524,7 +527,8 @@ static void getCopyToPartsVector(SelectionDAG &DAG, DebugLoc DL, } // Handle a multi-element vector. - EVT IntermediateVT, RegisterVT; + EVT IntermediateVT; + MVT RegisterVT; unsigned NumIntermediates; unsigned NumRegs = TLI.getVectorTypeBreakdown(*DAG.getContext(), ValueVT, IntermediateVT, @@ -589,7 +593,7 @@ namespace { /// getRegisterType member function, however when with physical registers /// it is necessary to have a separate record of the types. /// - SmallVector RegVTs; + SmallVector RegVTs; /// Regs - This list holds the registers assigned to the values. /// Each legal or promoted value requires one register, and each @@ -600,7 +604,7 @@ namespace { RegsForValue() {} RegsForValue(const SmallVector ®s, - EVT regvt, EVT valuevt) + MVT regvt, EVT valuevt) : ValueVTs(1, valuevt), RegVTs(1, regvt), Regs(regs) {} RegsForValue(LLVMContext &Context, const TargetLowering &tli, @@ -610,7 +614,7 @@ namespace { for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = tli.getNumRegisters(Context, ValueVT); - EVT RegisterVT = tli.getRegisterType(Context, ValueVT); + MVT RegisterVT = tli.getRegisterType(Context, ValueVT); for (unsigned i = 0; i != NumRegs; ++i) Regs.push_back(Reg + i); RegVTs.push_back(RegisterVT); @@ -621,7 +625,7 @@ namespace { /// areValueTypesLegal - Return true if types of all the values are legal. bool areValueTypesLegal(const TargetLowering &TLI) { for (unsigned Value = 0, e = ValueVTs.size(); Value != e; ++Value) { - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; if (!TLI.isTypeLegal(RegisterVT)) return false; } @@ -683,7 +687,7 @@ SDValue RegsForValue::getCopyFromRegs(SelectionDAG &DAG, // Copy the legal parts from the registers. EVT ValueVT = ValueVTs[Value]; unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; Parts.resize(NumRegs); for (unsigned i = 0; i != NumRegs; ++i) { @@ -768,10 +772,12 @@ void RegsForValue::getCopyToRegs(SDValue Val, SelectionDAG &DAG, DebugLoc dl, for (unsigned Value = 0, Part = 0, e = ValueVTs.size(); Value != e; ++Value) { EVT ValueVT = ValueVTs[Value]; unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), ValueVT); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; + ISD::NodeType ExtendKind = + TLI.isZExtFree(Val, RegisterVT)? ISD::ZERO_EXTEND: ISD::ANY_EXTEND; getCopyToParts(DAG, dl, Val.getValue(Val.getResNo() + Value), - &Parts[Part], NumParts, RegisterVT, V); + &Parts[Part], NumParts, RegisterVT, V, ExtendKind); Part += NumParts; } @@ -834,7 +840,7 @@ void RegsForValue::AddInlineAsmOperands(unsigned Code, bool HasMatching, for (unsigned Value = 0, Reg = 0, e = ValueVTs.size(); Value != e; ++Value) { unsigned NumRegs = TLI.getNumRegisters(*DAG.getContext(), ValueVTs[Value]); - EVT RegisterVT = RegVTs[Value]; + MVT RegisterVT = RegVTs[Value]; for (unsigned i = 0; i != NumRegs; ++i) { assert(Reg < Regs.size() && "Mismatch in # registers expected"); Ops.push_back(DAG.getRegister(Regs[Reg++], RegisterVT)); @@ -967,7 +973,7 @@ void SelectionDAGBuilder::visit(unsigned Opcode, const User &I) { // Build the switch statement using the Instruction.def file. #define HANDLE_INST(NUM, OPCODE, CLASS) \ case Instruction::OPCODE: visit##OPCODE((const CLASS&)I); break; -#include "llvm/Instruction.def" +#include "llvm/IR/Instruction.def" } // Assign the ordering to the freshly created DAG nodes. @@ -1227,16 +1233,18 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { ISD::NodeType ExtendKind = ISD::ANY_EXTEND; const Function *F = I.getParent()->getParent(); - if (F->getRetAttributes().hasAttribute(Attributes::SExt)) + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::SExt)) ExtendKind = ISD::SIGN_EXTEND; - else if (F->getRetAttributes().hasAttribute(Attributes::ZExt)) + else if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::ZExt)) ExtendKind = ISD::ZERO_EXTEND; if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) - VT = TLI.getTypeForExtArgOrReturn(*DAG.getContext(), VT, ExtendKind); + VT = TLI.getTypeForExtArgOrReturn(VT.getSimpleVT(), ExtendKind); unsigned NumParts = TLI.getNumRegisters(*DAG.getContext(), VT); - EVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); + MVT PartVT = TLI.getRegisterType(*DAG.getContext(), VT); SmallVector Parts(NumParts); getCopyToParts(DAG, getCurDebugLoc(), SDValue(RetOp.getNode(), RetOp.getResNo() + j), @@ -1244,7 +1252,8 @@ void SelectionDAGBuilder::visitRet(const ReturnInst &I) { // 'inreg' on function refers to return value ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (F->getRetAttributes().hasAttribute(Attributes::InReg)) + if (F->getAttributes().hasAttribute(AttributeSet::ReturnIndex, + Attribute::InReg)) Flags.setInReg(); // Propagate extension type if any @@ -1758,8 +1767,8 @@ void SelectionDAGBuilder::visitBitTestHeader(BitTestBlock &B, Sub = DAG.getZExtOrTrunc(Sub, getCurDebugLoc(), VT); } - B.RegVT = VT; - B.Reg = FuncInfo.CreateReg(VT); + B.RegVT = VT.getSimpleVT(); + B.Reg = FuncInfo.CreateReg(B.RegVT); SDValue CopyTo = DAG.getCopyToReg(getControlRoot(), getCurDebugLoc(), B.Reg, Sub); @@ -1793,7 +1802,7 @@ void SelectionDAGBuilder::visitBitTestCase(BitTestBlock &BB, unsigned Reg, BitTestCase &B, MachineBasicBlock *SwitchBB) { - EVT VT = BB.RegVT; + MVT VT = BB.RegVT; SDValue ShiftOp = DAG.getCopyFromReg(getControlRoot(), getCurDebugLoc(), Reg, VT); SDValue Cmp; @@ -2645,7 +2654,7 @@ void SelectionDAGBuilder::visitShift(const User &I, unsigned Opcode) { SDValue Op1 = getValue(I.getOperand(0)); SDValue Op2 = getValue(I.getOperand(1)); - MVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); + EVT ShiftTy = TLI.getShiftAmountTy(Op2.getValueType()); // Coerce the shift amount to the right type if we can. if (!I.getType()->isVectorTy() && Op2.getValueType() != ShiftTy) { @@ -3137,12 +3146,12 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { OI != E; ++OI) { const Value *Idx = *OI; if (StructType *StTy = dyn_cast(Ty)) { - unsigned Field = cast(Idx)->getZExtValue(); + unsigned Field = cast(Idx)->getUniqueInteger().getZExtValue(); if (Field) { // N = N + Offset uint64_t Offset = TD->getStructLayout(StTy)->getElementOffset(Field); N = DAG.getNode(ISD::ADD, getCurDebugLoc(), N.getValueType(), N, - DAG.getIntPtrConstant(Offset)); + DAG.getConstant(Offset, N.getValueType())); } Ty = StTy->getElementType(Field); @@ -3187,7 +3196,7 @@ void SelectionDAGBuilder::visitGetElementPtr(const User &I) { N.getValueType(), IdxN, DAG.getConstant(Amt, IdxN.getValueType())); } else { - SDValue Scale = DAG.getConstant(ElementSize, TLI.getPointerTy()); + SDValue Scale = DAG.getConstant(ElementSize, IdxN.getValueType()); IdxN = DAG.getNode(ISD::MUL, getCurDebugLoc(), N.getValueType(), IdxN, Scale); } @@ -3510,7 +3519,7 @@ void SelectionDAGBuilder::visitAtomicLoad(const LoadInst &I) { EVT VT = TLI.getValueType(I.getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic load"); SDValue L = @@ -3540,7 +3549,7 @@ void SelectionDAGBuilder::visitAtomicStore(const StoreInst &I) { EVT VT = TLI.getValueType(I.getValueOperand()->getType()); - if (I.getAlignment() * 8 < VT.getSizeInBits()) + if (I.getAlignment() < VT.getSizeInBits() / 8) report_fatal_error("Cannot generate unaligned atomic store"); if (TLI.getInsertFencesForAtomic()) @@ -3654,7 +3663,7 @@ void SelectionDAGBuilder::visitTargetIntrinsic(const CallInst &I, /// /// Op = (Op & 0x007fffff) | 0x3f800000; /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { SDValue t1 = DAG.getNode(ISD::AND, dl, MVT::i32, Op, @@ -3668,7 +3677,7 @@ GetSignificand(SelectionDAG &DAG, SDValue Op, DebugLoc dl) { /// /// (float)(int)(((Op & 0x7f800000) >> 23) - 127); /// -/// where Op is the hexidecimal representation of floating point value. +/// where Op is the hexadecimal representation of floating point value. static SDValue GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, DebugLoc dl) { @@ -3684,19 +3693,16 @@ GetExponent(SelectionDAG &DAG, SDValue Op, const TargetLowering &TLI, /// getF32Constant - Get 32-bit floating point constant. static SDValue getF32Constant(SelectionDAG &DAG, unsigned Flt) { - return DAG.getConstantFP(APFloat(APInt(32, Flt)), MVT::f32); + return DAG.getConstantFP(APFloat(APFloat::IEEEsingle, APInt(32, Flt)), + MVT::f32); } -/// visitExp - Lower an exp intrinsic. Handles the special sequences for +/// expandExp - Lower an exp intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitExp(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandExp(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); // Put the exponent in the right bit position for later addition to the // final result: @@ -3715,6 +3721,7 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFracPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3728,16 +3735,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t5); - - // Add the exponent into the result in integer domain. - SDValue t6 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t6); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -3754,16 +3754,9 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl,MVT::i32, t7); - - // Add the exponent into the result in integer domain. - SDValue t8 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t8); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -3792,37 +3785,27 @@ SelectionDAGBuilder::visitExp(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue TwoToFracPartOfX = DAG.getNode(ISD::BITCAST, dl, - MVT::i32, t13); - - // Add the exponent into the result in integer domain. - SDValue t14 = DAG.getNode(ISD::ADD, dl, MVT::i32, - TwoToFracPartOfX, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, MVT::f32, t14); + TwoToFracPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FEXP, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, TwoToFracPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FEXP, dl, Op.getValueType(), Op); } -/// visitLog - Lower a log intrinsic. Handles the special sequences for +/// expandLog - Lower a log intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log(2) [0.69314718f]. @@ -3834,6 +3817,7 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); + SDValue LogOfMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3847,12 +3831,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3fb3a2b1)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f949a29)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f949a29)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // LogOfMantissa = @@ -3873,12 +3854,9 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40348e95)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3fdef31a)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3fdef31a)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // LogOfMantissa = @@ -3907,32 +3885,23 @@ SelectionDAGBuilder::visitLog(const CallInst &I) { SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x408797cb)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4006dcab)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, LogOfMantissa); + LogOfMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4006dcab)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, LogOfMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG, dl, Op.getValueType(), Op); } -/// visitLog2 - Lower a log2 intrinsic. Handles the special sequences for +/// expandLog2 - Lower a log2 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog2(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Get the exponent. @@ -3944,6 +3913,7 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { // Different possible minimax approximations of significand in // floating-point for various degrees of accuracy over [1,2]. + SDValue Log2ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -3955,12 +3925,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x40019463)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3fd6633d)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3fd6633d)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log2ofMantissa = @@ -3981,12 +3948,9 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x40823e2f)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, - getF32Constant(DAG, 0x4020d29c)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t6, + getF32Constant(DAG, 0x4020d29c)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log2ofMantissa = @@ -4016,32 +3980,23 @@ SelectionDAGBuilder::visitLog2(const CallInst &I) { SDValue t9 = DAG.getNode(ISD::FADD, dl, MVT::f32, t8, getF32Constant(DAG, 0x40c39dad)); SDValue t10 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t9, X); - SDValue Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, - getF32Constant(DAG, 0x4042902c)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log2ofMantissa); + Log2ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t10, + getF32Constant(DAG, 0x4042902c)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG2, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log2ofMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG2, dl, Op.getValueType(), Op); } -/// visitLog10 - Lower a log10 intrinsic. Handles the special sequences for +/// expandLog10 - Lower a log10 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitLog10(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandLog10(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, Op); // Scale the exponent by log10(2) [0.30102999f]. @@ -4053,6 +4008,7 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { // exponent of 1. SDValue X = GetSignificand(DAG, Op1, dl); + SDValue Log10ofMantissa; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4066,12 +4022,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t1 = DAG.getNode(ISD::FADD, dl, MVT::f32, t0, getF32Constant(DAG, 0x3f1c0789)); SDValue t2 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t1, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, - getF32Constant(DAG, 0x3f011300)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t2, + getF32Constant(DAG, 0x3f011300)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // Log10ofMantissa = @@ -4088,12 +4041,9 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f6ae232)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f25f7c3)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f25f7c3)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // Log10ofMantissa = @@ -4118,33 +4068,23 @@ SelectionDAGBuilder::visitLog10(const CallInst &I) { SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, getF32Constant(DAG, 0x3fc4316c)); SDValue t8 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t7, X); - SDValue Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, - getF32Constant(DAG, 0x3f57ce70)); - - result = DAG.getNode(ISD::FADD, dl, - MVT::f32, LogOfExponent, Log10ofMantissa); + Log10ofMantissa = DAG.getNode(ISD::FSUB, dl, MVT::f32, t8, + getF32Constant(DAG, 0x3f57ce70)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FLOG10, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + return DAG.getNode(ISD::FADD, dl, MVT::f32, LogOfExponent, Log10ofMantissa); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FLOG10, dl, Op.getValueType(), Op); } -/// visitExp2 - Lower an exp2 intrinsic. Handles the special sequences for +/// expandExp2 - Lower an exp2 intrinsic. Handles the special sequences for /// limited-precision mode. -void -SelectionDAGBuilder::visitExp2(const CallInst &I) { - SDValue result; - DebugLoc dl = getCurDebugLoc(); - - if (getValue(I.getArgOperand(0)).getValueType() == MVT::f32 && +static SDValue expandExp2(DebugLoc dl, SDValue Op, SelectionDAG &DAG, + const TargetLowering &TLI) { + if (Op.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(0)); - SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, Op); // FractionalPartOfX = x - (float)IntegerPartOfX; @@ -4155,6 +4095,7 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4168,15 +4109,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -4193,15 +4128,9 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -4229,54 +4158,42 @@ SelectionDAGBuilder::visitExp2(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FEXP2, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0))); + + // Add the exponent into the result in integer domain. + SDValue t13 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, + TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FEXP2, dl, Op.getValueType(), Op); } /// visitPow - Lower a pow intrinsic. Handles the special sequences for /// limited-precision mode with x == 10.0f. -void -SelectionDAGBuilder::visitPow(const CallInst &I) { - SDValue result; - const Value *Val = I.getArgOperand(0); - DebugLoc dl = getCurDebugLoc(); +static SDValue expandPow(DebugLoc dl, SDValue LHS, SDValue RHS, + SelectionDAG &DAG, const TargetLowering &TLI) { bool IsExp10 = false; - - if (getValue(Val).getValueType() == MVT::f32 && - getValue(I.getArgOperand(1)).getValueType() == MVT::f32 && + if (LHS.getValueType() == MVT::f32 && LHS.getValueType() == MVT::f32 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - if (Constant *C = const_cast(dyn_cast(Val))) { - if (ConstantFP *CFP = dyn_cast(C)) { - APFloat Ten(10.0f); - IsExp10 = CFP->getValueAPF().bitwiseIsEqual(Ten); - } + if (ConstantFPSDNode *LHSC = dyn_cast(LHS)) { + APFloat Ten(10.0f); + IsExp10 = LHSC->isExactlyValue(Ten); } } - if (IsExp10 && LimitFloatPrecision > 0 && LimitFloatPrecision <= 18) { - SDValue Op = getValue(I.getArgOperand(1)); - + if (IsExp10) { // Put the exponent in the right bit position for later addition to the // final result: // // #define LOG2OF10 3.3219281f // IntegerPartOfX = (int32_t)(x * LOG2OF10); - SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, Op, + SDValue t0 = DAG.getNode(ISD::FMUL, dl, MVT::f32, RHS, getF32Constant(DAG, 0x40549a78)); SDValue IntegerPartOfX = DAG.getNode(ISD::FP_TO_SINT, dl, MVT::i32, t0); @@ -4288,6 +4205,7 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { IntegerPartOfX = DAG.getNode(ISD::SHL, dl, MVT::i32, IntegerPartOfX, DAG.getConstant(23, TLI.getPointerTy())); + SDValue TwoToFractionalPartOfX; if (LimitFloatPrecision <= 6) { // For floating-point precision of 6: // @@ -4301,15 +4219,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t3 = DAG.getNode(ISD::FADD, dl, MVT::f32, t2, getF32Constant(DAG, 0x3f3c50c8)); SDValue t4 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t3, X); - SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, - getF32Constant(DAG, 0x3f7f5e7e)); - SDValue t6 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t5); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t6, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else if (LimitFloatPrecision > 6 && LimitFloatPrecision <= 12) { + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, + getF32Constant(DAG, 0x3f7f5e7e)); + } else if (LimitFloatPrecision <= 12) { // For floating-point precision of 12: // // TwoToFractionalPartOfX = @@ -4326,15 +4238,9 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t5 = DAG.getNode(ISD::FADD, dl, MVT::f32, t4, getF32Constant(DAG, 0x3f324b07)); SDValue t6 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t5, X); - SDValue t7 = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, - getF32Constant(DAG, 0x3f7ff8fd)); - SDValue t8 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t7); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t8, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); - } else { // LimitFloatPrecision > 12 && LimitFloatPrecision <= 18 + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t6, + getF32Constant(DAG, 0x3f7ff8fd)); + } else { // LimitFloatPrecision <= 18 // For floating-point precision of 18: // // TwoToFractionalPartOfX = @@ -4362,24 +4268,18 @@ SelectionDAGBuilder::visitPow(const CallInst &I) { SDValue t11 = DAG.getNode(ISD::FADD, dl, MVT::f32, t10, getF32Constant(DAG, 0x3f317234)); SDValue t12 = DAG.getNode(ISD::FMUL, dl, MVT::f32, t11, X); - SDValue t13 = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, - getF32Constant(DAG, 0x3f800000)); - SDValue t14 = DAG.getNode(ISD::BITCAST, dl, MVT::i32, t13); - SDValue TwoToFractionalPartOfX = - DAG.getNode(ISD::ADD, dl, MVT::i32, t14, IntegerPartOfX); - - result = DAG.getNode(ISD::BITCAST, dl, - MVT::f32, TwoToFractionalPartOfX); + TwoToFractionalPartOfX = DAG.getNode(ISD::FADD, dl, MVT::f32, t12, + getF32Constant(DAG, 0x3f800000)); } - } else { - // No special expansion. - result = DAG.getNode(ISD::FPOW, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)), - getValue(I.getArgOperand(1))); + + SDValue t13 = DAG.getNode(ISD::BITCAST, dl,MVT::i32,TwoToFractionalPartOfX); + return DAG.getNode(ISD::BITCAST, dl, MVT::f32, + DAG.getNode(ISD::ADD, dl, MVT::i32, + t13, IntegerPartOfX)); } - setValue(&I, result); + // No special expansion. + return DAG.getNode(ISD::FPOW, dl, LHS.getValueType(), LHS, RHS); } @@ -4400,7 +4300,8 @@ static SDValue ExpandPowI(DebugLoc DL, SDValue LHS, SDValue RHS, return DAG.getConstantFP(1.0, LHS.getValueType()); const Function *F = DAG.getMachineFunction().getFunction(); - if (!F->getFnAttributes().hasAttribute(Attributes::OptimizeForSize) || + if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::OptimizeForSize) || // If optimizing for size, don't insert too many multiplies. This // inserts up to 5 multiplies. CountPopulation_32(Val)+Log2_32(Val) < 7) { @@ -4566,6 +4467,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memcpy defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemcpy(getRoot(), dl, Op1, Op2, Op3, Align, isVol, false, MachinePointerInfo(I.getArgOperand(0)), @@ -4582,6 +4485,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memset defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemset(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)))); @@ -4599,6 +4504,8 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDValue Op3 = getValue(I.getArgOperand(2)); unsigned Align = cast(I.getArgOperand(3))->getZExtValue(); + if (!Align) + Align = 1; // @llvm.memmove defines 0 and 1 to both mean no alignment. bool isVol = cast(I.getArgOperand(4))->getZExtValue(); DAG.setRoot(DAG.getMemmove(getRoot(), dl, Op1, Op2, Op3, Align, isVol, MachinePointerInfo(I.getArgOperand(0)), @@ -4873,7 +4780,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { // the sse2/mmx shift instructions reads 64 bits. Set the upper 32 bits // to be zero. // We must do this early because v2i32 is not a legal type. - DebugLoc dl = getCurDebugLoc(); SDValue ShOps[2]; ShOps[0] = ShAmt; ShOps[1] = DAG.getConstant(0, MVT::i32); @@ -4890,7 +4796,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vinsertf128_ps_256: case Intrinsic::x86_avx_vinsertf128_si_256: case Intrinsic::x86_avx2_vinserti128: { - DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); EVT ElVT = TLI.getValueType(I.getArgOperand(1)->getType()); uint64_t Idx = (cast(I.getArgOperand(2))->getZExtValue() & 1) * @@ -4906,7 +4811,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::x86_avx_vextractf128_ps_256: case Intrinsic::x86_avx_vextractf128_si_256: case Intrinsic::x86_avx2_vextracti128: { - DebugLoc dl = getCurDebugLoc(); EVT DestVT = TLI.getValueType(I.getType()); uint64_t Idx = (cast(I.getArgOperand(1))->getZExtValue() & 1) * DestVT.getVectorNumElements(); @@ -4940,7 +4844,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { } EVT DestVT = TLI.getValueType(I.getType()); const Value *Op1 = I.getArgOperand(0); - Res = DAG.getConvertRndSat(DestVT, getCurDebugLoc(), getValue(Op1), + Res = DAG.getConvertRndSat(DestVT, dl, getValue(Op1), DAG.getValueType(DestVT), DAG.getValueType(getValue(Op1).getValueType()), getValue(I.getArgOperand(1)), @@ -4949,53 +4853,57 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { setValue(&I, Res); return 0; } - case Intrinsic::sqrt: - setValue(&I, DAG.getNode(ISD::FSQRT, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; case Intrinsic::powi: setValue(&I, ExpandPowI(dl, getValue(I.getArgOperand(0)), getValue(I.getArgOperand(1)), DAG)); return 0; - case Intrinsic::sin: - setValue(&I, DAG.getNode(ISD::FSIN, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; - case Intrinsic::cos: - setValue(&I, DAG.getNode(ISD::FCOS, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; case Intrinsic::log: - visitLog(I); + setValue(&I, expandLog(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::log2: - visitLog2(I); + setValue(&I, expandLog2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::log10: - visitLog10(I); + setValue(&I, expandLog10(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::exp: - visitExp(I); + setValue(&I, expandExp(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::exp2: - visitExp2(I); + setValue(&I, expandExp2(dl, getValue(I.getArgOperand(0)), DAG, TLI)); return 0; case Intrinsic::pow: - visitPow(I); + setValue(&I, expandPow(dl, getValue(I.getArgOperand(0)), + getValue(I.getArgOperand(1)), DAG, TLI)); return 0; + case Intrinsic::sqrt: case Intrinsic::fabs: - setValue(&I, DAG.getNode(ISD::FABS, dl, - getValue(I.getArgOperand(0)).getValueType(), - getValue(I.getArgOperand(0)))); - return 0; + case Intrinsic::sin: + case Intrinsic::cos: case Intrinsic::floor: - setValue(&I, DAG.getNode(ISD::FFLOOR, dl, + case Intrinsic::ceil: + case Intrinsic::trunc: + case Intrinsic::rint: + case Intrinsic::nearbyint: { + unsigned Opcode; + switch (Intrinsic) { + default: llvm_unreachable("Impossible intrinsic"); // Can't reach here. + case Intrinsic::sqrt: Opcode = ISD::FSQRT; break; + case Intrinsic::fabs: Opcode = ISD::FABS; break; + case Intrinsic::sin: Opcode = ISD::FSIN; break; + case Intrinsic::cos: Opcode = ISD::FCOS; break; + case Intrinsic::floor: Opcode = ISD::FFLOOR; break; + case Intrinsic::ceil: Opcode = ISD::FCEIL; break; + case Intrinsic::trunc: Opcode = ISD::FTRUNC; break; + case Intrinsic::rint: Opcode = ISD::FRINT; break; + case Intrinsic::nearbyint: Opcode = ISD::FNEARBYINT; break; + } + + setValue(&I, DAG.getNode(Opcode, dl, getValue(I.getArgOperand(0)).getValueType(), getValue(I.getArgOperand(0)))); return 0; + } case Intrinsic::fma: setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5006,7 +4914,6 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { case Intrinsic::fmuladd: { EVT VT = TLI.getValueType(I.getType()); if (TM.Options.AllowFPOpFusion != FPOpFusion::Strict && - TLI.isOperationLegal(ISD::FMA, VT) && TLI.isFMAFasterThanMulAndAdd(VT)){ setValue(&I, DAG.getNode(ISD::FMA, dl, getValue(I.getArgOperand(0)).getValueType(), @@ -5103,7 +5010,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue FIN = DAG.getFrameIndex(FI, PtrTy); // Store the stack protector onto the stack. - Res = DAG.getStore(getRoot(), getCurDebugLoc(), Src, FIN, + Res = DAG.getStore(getRoot(), dl, Src, FIN, MachinePointerInfo::getFixedStack(FI), true, false, 0); setValue(&I, Res); @@ -5191,7 +5098,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { /*isTailCall=*/false, /*doesNotRet=*/false, /*isReturnValueUsed=*/true, DAG.getExternalSymbol(TrapFuncName.data(), TLI.getPointerTy()), - Args, DAG, getCurDebugLoc()); + Args, DAG, dl); std::pair Result = TLI.LowerCallTo(CLI); DAG.setRoot(Result.second); return 0; @@ -5217,7 +5124,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { SDValue Op2 = getValue(I.getArgOperand(1)); SDVTList VTs = DAG.getVTList(Op1.getValueType(), MVT::i1); - setValue(&I, DAG.getNode(Op, getCurDebugLoc(), VTs, Op1, Op2)); + setValue(&I, DAG.getNode(Op, dl, VTs, Op1, Op2)); return 0; } case Intrinsic::prefetch: { @@ -5267,6 +5174,7 @@ SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I, unsigned Intrinsic) { Res = DAG.getNode(Opcode, dl, MVT::Other, Ops, 2); DAG.setRoot(Res); } + return 0; } case Intrinsic::invariant_start: // Discard region information. @@ -5296,8 +5204,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check whether the function can return without sret-demotion. SmallVector Outs; - GetReturnInfo(RetTy, CS.getAttributes().getRetAttributes(), - Outs, TLI); + GetReturnInfo(RetTy, CS.getAttributes(), Outs, TLI); bool CanLowerReturn = TLI.CanLowerReturn(CS.getCallingConv(), DAG.getMachineFunction(), @@ -5342,12 +5249,12 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, Entry.Node = ArgNode; Entry.Ty = V->getType(); unsigned attrInd = i - CS.arg_begin() + 1; - Entry.isSExt = CS.paramHasAttr(attrInd, Attributes::SExt); - Entry.isZExt = CS.paramHasAttr(attrInd, Attributes::ZExt); - Entry.isInReg = CS.paramHasAttr(attrInd, Attributes::InReg); - Entry.isSRet = CS.paramHasAttr(attrInd, Attributes::StructRet); - Entry.isNest = CS.paramHasAttr(attrInd, Attributes::Nest); - Entry.isByVal = CS.paramHasAttr(attrInd, Attributes::ByVal); + Entry.isSExt = CS.paramHasAttr(attrInd, Attribute::SExt); + Entry.isZExt = CS.paramHasAttr(attrInd, Attribute::ZExt); + Entry.isInReg = CS.paramHasAttr(attrInd, Attribute::InReg); + Entry.isSRet = CS.paramHasAttr(attrInd, Attribute::StructRet); + Entry.isNest = CS.paramHasAttr(attrInd, Attribute::Nest); + Entry.isByVal = CS.paramHasAttr(attrInd, Attribute::ByVal); Entry.Alignment = CS.getParamAlignment(attrInd); Args.push_back(Entry); } @@ -5376,13 +5283,7 @@ void SelectionDAGBuilder::LowerCallTo(ImmutableCallSite CS, SDValue Callee, // Check if target-independent constraints permit a tail call here. // Target-dependent constraints are checked within TLI.LowerCallTo. - if (isTailCall && - !isInTailCallPosition(CS, CS.getAttributes().getRetAttributes(), TLI)) - isTailCall = false; - - // If there's a possibility that fast-isel has already selected some amount - // of the current basic block, don't emit a tail call. - if (isTailCall && TM.Options.EnableFastISel) + if (isTailCall && !isInTailCallPosition(CS, TLI)) isTailCall = false; TargetLowering:: @@ -5856,7 +5757,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // Try to convert to the first EVT that the reg class contains. If the // types are identical size, use a bitcast to convert (e.g. two differing // vector types). - EVT RegVT = *PhysReg.second->vt_begin(); + MVT RegVT = *PhysReg.second->vt_begin(); if (RegVT.getSizeInBits() == OpInfo.ConstraintVT.getSizeInBits()) { OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); @@ -5866,8 +5767,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, // bitcast to the corresponding integer type. This turns an f64 value // into i64, which can be passed with two i32 values on a 32-bit // machine. - RegVT = EVT::getIntegerVT(Context, - OpInfo.ConstraintVT.getSizeInBits()); + RegVT = MVT::getIntegerVT(OpInfo.ConstraintVT.getSizeInBits()); OpInfo.CallOperand = DAG.getNode(ISD::BITCAST, DL, RegVT, OpInfo.CallOperand); OpInfo.ConstraintVT = RegVT; @@ -5877,7 +5777,7 @@ static void GetRegistersForValue(SelectionDAG &DAG, NumRegs = TLI.getNumRegisters(Context, OpInfo.ConstraintVT); } - EVT RegVT; + MVT RegVT; EVT ValueVT = OpInfo.ConstraintVT; // If this is a constraint for a specific physical register, like {r17}, @@ -5951,7 +5851,7 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ConstraintOperands.push_back(SDISelAsmOperandInfo(TargetConstraints[i])); SDISelAsmOperandInfo &OpInfo = ConstraintOperands.back(); - EVT OpVT = MVT::Other; + MVT OpVT = MVT::Other; // Compute the value type for each operand. switch (OpInfo.Type) { @@ -5966,10 +5866,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // corresponding argument. assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { - OpVT = TLI.getValueType(STy->getElementType(ResNo)); + OpVT = TLI.getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpVT = TLI.getValueType(CS.getType()); + OpVT = TLI.getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -5990,7 +5890,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { OpInfo.CallOperand = getValue(OpInfo.CallOperandVal); } - OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD); + OpVT = OpInfo.getCallOperandValEVT(*DAG.getContext(), TLI, TD). + getSimpleVT(); } OpInfo.ConstraintVT = OpVT; @@ -6052,6 +5953,10 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { // Compute the constraint code and ConstraintType to use. TLI.ComputeConstraintToUse(OpInfo, OpInfo.CallOperand, &DAG); + if (OpInfo.ConstraintType == TargetLowering::C_Memory && + OpInfo.Type == InlineAsm::isClobber) + continue; + // If this is a memory input, and if the operand is not indirect, do what we // need to to provide an address for the memory input. if (OpInfo.ConstraintType == TargetLowering::C_Memory && @@ -6155,6 +6060,8 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { ExtraInfo |= InlineAsm::Extra_MayLoad; else if (OpInfo.Type == InlineAsm::isOutput) ExtraInfo |= InlineAsm::Extra_MayStore; + else if (OpInfo.Type == InlineAsm::isClobber) + ExtraInfo |= (InlineAsm::Extra_MayLoad | InlineAsm::Extra_MayStore); } } @@ -6253,11 +6160,12 @@ void SelectionDAGBuilder::visitInlineAsm(ImmutableCallSite CS) { Ctx.emitError(CS.getInstruction(), "inline asm not supported yet:" " don't know how to handle tied " "indirect register inputs"); + report_fatal_error("Cannot handle indirect register inputs!"); } RegsForValue MatchedRegs; MatchedRegs.ValueVTs.push_back(InOperandVal.getValueType()); - EVT RegVT = AsmNodeOperands[CurOp+1].getValueType(); + MVT RegVT = AsmNodeOperands[CurOp+1].getSimpleValueType(); MatchedRegs.RegVTs.push_back(RegVT); MachineRegisterInfo &RegInfo = DAG.getMachineFunction().getRegInfo(); for (unsigned i = 0, e = InlineAsm::getNumOperandRegisters(OpFlag); @@ -6524,7 +6432,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT PartVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumParts = getNumRegisters(CLI.RetTy->getContext(), VT); SmallVector Parts(NumParts); ISD::NodeType ExtendKind = ISD::ANY_EXTEND; @@ -6559,11 +6467,11 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { ComputeValueVTs(*this, CLI.RetTy, RetTys); for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags; - MyFlags.VT = RegisterVT.getSimpleVT(); + MyFlags.VT = RegisterVT; MyFlags.Used = CLI.IsReturnValueUsed; if (CLI.RetSExt) MyFlags.Flags.setSExt(); @@ -6613,7 +6521,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const { unsigned CurReg = 0; for (unsigned I = 0, E = RetTys.size(); I != E; ++I) { EVT VT = RetTys[I]; - EVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); + MVT RegisterVT = getRegisterType(CLI.RetTy->getContext(), VT); unsigned NumRegs = getNumRegisters(CLI.RetTy->getContext(), VT); ReturnValues.push_back(getCopyFromParts(CLI.DAG, CLI.DL, &InVals[CurReg], @@ -6681,19 +6589,12 @@ static bool isOnlyUsedInEntryBlock(const Argument *A, bool FastISel) { return true; } -void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { - // If this is the entry block, emit arguments. - const Function &F = *LLVMBB->getParent(); +void SelectionDAGISel::LowerArguments(const Function &F) { SelectionDAG &DAG = SDB->DAG; DebugLoc dl = SDB->getCurDebugLoc(); const DataLayout *TD = TLI.getDataLayout(); SmallVector Ins; - // Check whether the function can return without sret-demotion. - SmallVector Outs; - GetReturnInfo(F.getReturnType(), F.getAttributes().getRetAttributes(), - Outs, TLI); - if (!FuncInfo->CanLowerReturn) { // Put in an sret pointer parameter before all the other parameters. SmallVector ValueVTs; @@ -6703,7 +6604,7 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // or one register. ISD::ArgFlagsTy Flags; Flags.setSRet(); - EVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); + MVT RegisterVT = TLI.getRegisterType(*DAG.getContext(), ValueVTs[0]); ISD::InputArg RetArg(Flags, RegisterVT, true, 0, 0); Ins.push_back(RetArg); } @@ -6723,15 +6624,15 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { unsigned OriginalAlignment = TD->getABITypeAlignment(ArgTy); - if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) Flags.setZExt(); - if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) Flags.setSExt(); - if (F.getParamAttributes(Idx).hasAttribute(Attributes::InReg)) + if (F.getAttributes().hasAttribute(Idx, Attribute::InReg)) Flags.setInReg(); - if (F.getParamAttributes(Idx).hasAttribute(Attributes::StructRet)) + if (F.getAttributes().hasAttribute(Idx, Attribute::StructRet)) Flags.setSRet(); - if (F.getParamAttributes(Idx).hasAttribute(Attributes::ByVal)) { + if (F.getAttributes().hasAttribute(Idx, Attribute::ByVal)) { Flags.setByVal(); PointerType *Ty = cast(I->getType()); Type *ElementTy = Ty->getElementType(); @@ -6745,11 +6646,11 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { FrameAlign = TLI.getByValTypeAlignment(ElementTy); Flags.setByValAlign(FrameAlign); } - if (F.getParamAttributes(Idx).hasAttribute(Attributes::Nest)) + if (F.getAttributes().hasAttribute(Idx, Attribute::Nest)) Flags.setNest(); Flags.setOrigAlign(OriginalAlignment); - EVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT RegisterVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumRegs = TLI.getNumRegisters(*CurDAG->getContext(), VT); for (unsigned i = 0; i != NumRegs; ++i) { ISD::InputArg MyFlags(Flags, RegisterVT, isArgValueUsed, @@ -6795,8 +6696,8 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { // from the sret argument into it. SmallVector ValueVTs; ComputeValueVTs(TLI, PointerType::getUnqual(F.getReturnType()), ValueVTs); - EVT VT = ValueVTs[0]; - EVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT VT = ValueVTs[0].getSimpleVT(); + MVT RegVT = TLI.getRegisterType(*CurDAG->getContext(), VT); ISD::NodeType AssertOp = ISD::DELETED_NODE; SDValue ArgValue = getCopyFromParts(DAG, dl, &InVals[0], 1, RegVT, VT, NULL, AssertOp); @@ -6828,14 +6729,14 @@ void SelectionDAGISel::LowerArguments(const BasicBlock *LLVMBB) { for (unsigned Val = 0; Val != NumValues; ++Val) { EVT VT = ValueVTs[Val]; - EVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); + MVT PartVT = TLI.getRegisterType(*CurDAG->getContext(), VT); unsigned NumParts = TLI.getNumRegisters(*CurDAG->getContext(), VT); if (!I->use_empty()) { ISD::NodeType AssertOp = ISD::DELETED_NODE; - if (F.getParamAttributes(Idx).hasAttribute(Attributes::SExt)) + if (F.getAttributes().hasAttribute(Idx, Attribute::SExt)) AssertOp = ISD::AssertSext; - else if (F.getParamAttributes(Idx).hasAttribute(Attributes::ZExt)) + else if (F.getAttributes().hasAttribute(Idx, Attribute::ZExt)) AssertOp = ISD::AssertZext; ArgValues.push_back(getCopyFromParts(DAG, dl, &InVals[i], diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h index 9e46d9664f96..9188945bd906 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.h @@ -14,12 +14,12 @@ #ifndef SELECTIONDAGBUILDER_H #define SELECTIONDAGBUILDER_H -#include "llvm/Constants.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/SelectionDAGNodes.h" #include "llvm/CodeGen/ValueTypes.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/ErrorHandling.h" #include @@ -262,7 +262,7 @@ private: struct BitTestBlock { BitTestBlock(APInt F, APInt R, const Value* SV, - unsigned Rg, EVT RgVT, bool E, + unsigned Rg, MVT RgVT, bool E, MachineBasicBlock* P, MachineBasicBlock* D, const BitTestInfo& C): First(F), Range(R), SValue(SV), Reg(Rg), RegVT(RgVT), Emitted(E), @@ -271,7 +271,7 @@ private: APInt Range; const Value *SValue; unsigned Reg; - EVT RegVT; + MVT RegVT; bool Emitted; MachineBasicBlock *Parent; MachineBasicBlock *Default; @@ -533,13 +533,6 @@ private: const char *visitIntrinsicCall(const CallInst &I, unsigned Intrinsic); void visitTargetIntrinsic(const CallInst &I, unsigned Intrinsic); - void visitPow(const CallInst &I); - void visitExp2(const CallInst &I); - void visitExp(const CallInst &I); - void visitLog(const CallInst &I); - void visitLog2(const CallInst &I); - void visitLog10(const CallInst &I); - void visitVAStart(const CallInst &I); void visitVAArg(const VAArgInst &I); void visitVAEnd(const CallInst &I); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index 6f3ce7a44bc4..3b5823bfb277 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -11,23 +11,23 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/Intrinsics.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/GraphWriter.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/GraphWriter.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/StringExtras.h" using namespace llvm; std::string SDNode::getOperationName(const SelectionDAG *G) const { @@ -140,6 +140,7 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const { case ISD::FSQRT: return "fsqrt"; case ISD::FSIN: return "fsin"; case ISD::FCOS: return "fcos"; + case ISD::FSINCOS: return "fsincos"; case ISD::FTRUNC: return "ftrunc"; case ISD::FFLOOR: return "ffloor"; case ISD::FCEIL: return "fceil"; diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp index c314fa5b5118..eeea9e4cfcff 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGISel.cpp @@ -12,23 +12,18 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "isel" +#include "llvm/CodeGen/SelectionDAGISel.h" #include "ScheduleDAGSDNodes.h" #include "SelectionDAGBuilder.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/InlineAsm.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" +#include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/Statistic.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/CodeGen/FastISel.h" #include "llvm/CodeGen/FunctionLoweringInfo.h" -#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/GCMetadata.h" +#include "llvm/CodeGen/GCStrategy.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -37,22 +32,29 @@ #include "llvm/CodeGen/ScheduleHazardRecognizer.h" #include "llvm/CodeGen/SchedulerRegistry.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/CodeGen/SelectionDAGISel.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetIntrinsicInfo.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Support/Compiler.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Timer.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetIntrinsicInfo.h" #include "llvm/Target/TargetLibraryInfo.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetOptions.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/Target/TargetSubtargetInfo.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" -#include "llvm/Support/Compiler.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/Timer.h" -#include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/PostOrderIterator.h" -#include "llvm/ADT/Statistic.h" #include using namespace llvm; @@ -142,7 +144,12 @@ EnableFastISelVerbose("fast-isel-verbose", cl::Hidden, "instruction selector")); static cl::opt EnableFastISelAbort("fast-isel-abort", cl::Hidden, - cl::desc("Enable abort calls when \"fast\" instruction fails")); + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower an instruction")); +static cl::opt +EnableFastISelAbortArgs("fast-isel-abort-args", cl::Hidden, + cl::desc("Enable abort calls when \"fast\" instruction selection " + "fails to lower a formal argument")); static cl::opt UseMBPI("use-mbpi", @@ -216,8 +223,9 @@ namespace llvm { ScheduleDAGSDNodes* createDefaultScheduler(SelectionDAGISel *IS, CodeGenOpt::Level OptLevel) { const TargetLowering &TLI = IS->getTargetLowering(); + const TargetSubtargetInfo &ST = IS->TM.getSubtarget(); - if (OptLevel == CodeGenOpt::None || + if (OptLevel == CodeGenOpt::None || ST.enableMachineScheduler() || TLI.getSchedulingPreference() == Sched::Source) return createSourceListDAGScheduler(IS, OptLevel); if (TLI.getSchedulingPreference() == Sched::RegPressure) @@ -348,13 +356,19 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { RegInfo = &MF->getRegInfo(); AA = &getAnalysis(); LibInfo = &getAnalysis(); + TTI = getAnalysisIfAvailable(); GFI = Fn.hasGC() ? &getAnalysis().getFunctionInfo(Fn) : 0; + TargetSubtargetInfo &ST = + const_cast(TM.getSubtarget()); + ST.resetSubtargetFeatures(MF); + TM.resetTargetOptions(MF); + DEBUG(dbgs() << "\n\n\n=== " << Fn.getName() << "\n"); SplitCriticalSideEffectEdges(const_cast(Fn), this); - CurDAG->init(*MF); + CurDAG->init(*MF, TTI); FuncInfo->set(Fn, *MF); if (UseMBPI && OptLevel != CodeGenOpt::None) @@ -364,6 +378,7 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { SDB->init(GFI, *AA, LibInfo); + MF->setHasMSInlineAsm(false); SelectAllBasicBlocks(Fn); // If the first basic block in the function has live ins that need to be @@ -434,24 +449,26 @@ bool SelectionDAGISel::runOnMachineFunction(MachineFunction &mf) { // Determine if there are any calls in this machine function. MachineFrameInfo *MFI = MF->getFrameInfo(); - if (!MFI->hasCalls()) { - for (MachineFunction::const_iterator - I = MF->begin(), E = MF->end(); I != E; ++I) { - const MachineBasicBlock *MBB = I; - for (MachineBasicBlock::const_iterator - II = MBB->begin(), IE = MBB->end(); II != IE; ++II) { - const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); - - if ((MCID.isCall() && !MCID.isReturn()) || - II->isStackAligningInlineAsm()) { - MFI->setHasCalls(true); - goto done; - } + for (MachineFunction::const_iterator I = MF->begin(), E = MF->end(); I != E; + ++I) { + + if (MFI->hasCalls() && MF->hasMSInlineAsm()) + break; + + const MachineBasicBlock *MBB = I; + for (MachineBasicBlock::const_iterator II = MBB->begin(), IE = MBB->end(); + II != IE; ++II) { + const MCInstrDesc &MCID = TM.getInstrInfo()->get(II->getOpcode()); + if ((MCID.isCall() && !MCID.isReturn()) || + II->isStackAligningInlineAsm()) { + MFI->setHasCalls(true); + } + if (II->isMSInlineAsm()) { + MF->setHasMSInlineAsm(true); } } } - done: // Determine if there is a call to setjmp in the machine function. MF->setExposesReturnsTwice(Fn.callsFunctionThatReturnsTwice()); @@ -768,8 +785,12 @@ void SelectionDAGISel::DoInstructionSelection() { if (ResNode == Node || Node->getOpcode() == ISD::DELETED_NODE) continue; // Replace node. - if (ResNode) + if (ResNode) { + // Propagate ordering + CurDAG->AssignOrdering(ResNode, CurDAG->GetOrdering(Node)); + ReplaceUses(Node, ResNode); + } // If after the replacement this node is not used any more, // remove this dead node. @@ -1004,34 +1025,28 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { if (AllPredsVisited) { for (BasicBlock::const_iterator I = LLVMBB->begin(); - isa(I); ++I) - FuncInfo->ComputePHILiveOutRegInfo(cast(I)); + const PHINode *PN = dyn_cast(I); ++I) + FuncInfo->ComputePHILiveOutRegInfo(PN); } else { for (BasicBlock::const_iterator I = LLVMBB->begin(); - isa(I); ++I) - FuncInfo->InvalidatePHILiveOutRegInfo(cast(I)); + const PHINode *PN = dyn_cast(I); ++I) + FuncInfo->InvalidatePHILiveOutRegInfo(PN); } FuncInfo->VisitedBBs.insert(LLVMBB); } - FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; - FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); - BasicBlock::const_iterator const Begin = LLVMBB->getFirstNonPHI(); BasicBlock::const_iterator const End = LLVMBB->end(); BasicBlock::const_iterator BI = End; + FuncInfo->MBB = FuncInfo->MBBMap[LLVMBB]; FuncInfo->InsertPt = FuncInfo->MBB->getFirstNonPHI(); // Setup an EH landing-pad block. if (FuncInfo->MBB->isLandingPad()) PrepareEHLandingPad(); - // Lower any arguments needed in this block if this is the entry block. - if (LLVMBB == &Fn.getEntryBlock()) - LowerArguments(LLVMBB); - // Before doing SelectionDAG ISel, see if FastISel has been requested. if (FastIS) { FastIS->startNewBlock(); @@ -1039,9 +1054,18 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { // Emit code for any incoming arguments. This must happen before // beginning FastISel on the entry block. if (LLVMBB == &Fn.getEntryBlock()) { - CurDAG->setRoot(SDB->getControlRoot()); - SDB->clear(); - CodeGenAndEmitDAG(); + // Lower any arguments needed in this block if this is the entry block. + if (!FastIS->LowerArguments()) { + // Fast isel failed to lower these arguments + if (EnableFastISelAbortArgs) + llvm_unreachable("FastISel didn't lower all arguments"); + + // Use SelectionDAG argument lowering + LowerArguments(Fn); + CurDAG->setRoot(SDB->getControlRoot()); + SDB->clear(); + CodeGenAndEmitDAG(); + } // If we inserted any instructions at the beginning, make a note of // where they are, so we can be sure to emit subsequent instructions @@ -1111,19 +1135,21 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } bool HadTailCall = false; + MachineBasicBlock::iterator SavedInsertPt = FuncInfo->InsertPt; SelectBasicBlock(Inst, BI, HadTailCall); - // Recompute NumFastIselRemaining as Selection DAG instruction - // selection may have handled the call, input args, etc. - unsigned RemainingNow = std::distance(Begin, BI); - NumFastIselFailures += NumFastIselRemaining - RemainingNow; - // If the call was emitted as a tail call, we're done with the block. + // We also need to delete any previously emitted instructions. if (HadTailCall) { + FastIS->removeDeadCode(SavedInsertPt, FuncInfo->MBB->end()); --BI; break; } + // Recompute NumFastIselRemaining as Selection DAG instruction + // selection may have handled the call, input args, etc. + unsigned RemainingNow = std::distance(Begin, BI); + NumFastIselFailures += NumFastIselRemaining - RemainingNow; NumFastIselRemaining = RemainingNow; continue; } @@ -1150,6 +1176,10 @@ void SelectionDAGISel::SelectAllBasicBlocks(const Function &Fn) { } FastIS->recomputeInsertPt(); + } else { + // Lower any arguments needed in this block if this is the entry block. + if (LLVMBB == &Fn.getEntryBlock()) + LowerArguments(Fn); } if (Begin != BI) @@ -1189,14 +1219,12 @@ SelectionDAGISel::FinishBasicBlock() { SDB->JTCases.empty() && SDB->BitTestCases.empty()) { for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); if (!FuncInfo->MBB->isSuccessor(PHI->getParent())) continue; - PHI->addOperand( - MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } return; } @@ -1248,33 +1276,23 @@ SelectionDAGISel::FinishBasicBlock() { // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // This is "default" BB. We have two jumps to it. From "header" BB and // from last "case" BB. - if (PHIBB == SDB->BitTestCases[i].Default) { - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Parent)); - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(SDB->BitTestCases[i].Cases. - back().ThisBB)); - } + if (PHIBB == SDB->BitTestCases[i].Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Parent) + .addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->BitTestCases[i].Cases.back().ThisBB); // One of "cases" BB. for (unsigned j = 0, ej = SDB->BitTestCases[i].Cases.size(); j != ej; ++j) { MachineBasicBlock* cBB = SDB->BitTestCases[i].Cases[j].ThisBB; - if (cBB->isSuccessor(PHIBB)) { - PHI->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(cBB)); - } + if (cBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(cBB); } } } @@ -1309,25 +1327,17 @@ SelectionDAGISel::FinishBasicBlock() { // Update PHI Nodes for (unsigned pi = 0, pe = FuncInfo->PHINodesToUpdate.size(); pi != pe; ++pi) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[pi].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[pi].first); MachineBasicBlock *PHIBB = PHI->getParent(); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); // "default" BB. We can go there only from header BB. - if (PHIBB == SDB->JTCases[i].second.Default) { - PHI->addOperand - (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand - (MachineOperand::CreateMBB(SDB->JTCases[i].first.HeaderBB)); - } + if (PHIBB == SDB->JTCases[i].second.Default) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second) + .addMBB(SDB->JTCases[i].first.HeaderBB); // JT BB. Just iterate over successors here - if (FuncInfo->MBB->isSuccessor(PHIBB)) { - PHI->addOperand - (MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[pi].second, - false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); - } + if (FuncInfo->MBB->isSuccessor(PHIBB)) + PHI.addReg(FuncInfo->PHINodesToUpdate[pi].second).addMBB(FuncInfo->MBB); } } SDB->JTCases.clear(); @@ -1335,14 +1345,11 @@ SelectionDAGISel::FinishBasicBlock() { // If the switch block involved a branch to one of the actual successors, we // need to update PHI nodes in that block. for (unsigned i = 0, e = FuncInfo->PHINodesToUpdate.size(); i != e; ++i) { - MachineInstr *PHI = FuncInfo->PHINodesToUpdate[i].first; + MachineInstrBuilder PHI(*MF, FuncInfo->PHINodesToUpdate[i].first); assert(PHI->isPHI() && "This is not a machine PHI node that we are updating!"); - if (FuncInfo->MBB->isSuccessor(PHI->getParent())) { - PHI->addOperand( - MachineOperand::CreateReg(FuncInfo->PHINodesToUpdate[i].second, false)); - PHI->addOperand(MachineOperand::CreateMBB(FuncInfo->MBB)); - } + if (FuncInfo->MBB->isSuccessor(PHI->getParent())) + PHI.addReg(FuncInfo->PHINodesToUpdate[i].second).addMBB(FuncInfo->MBB); } // If we generated any switch lowering information, build and codegen any @@ -1378,18 +1385,16 @@ SelectionDAGISel::FinishBasicBlock() { // FuncInfo->MBB may have been removed from the CFG if a branch was // constant folded. if (ThisBB->isSuccessor(FuncInfo->MBB)) { - for (MachineBasicBlock::iterator Phi = FuncInfo->MBB->begin(); - Phi != FuncInfo->MBB->end() && Phi->isPHI(); - ++Phi) { + for (MachineBasicBlock::iterator + MBBI = FuncInfo->MBB->begin(), MBBE = FuncInfo->MBB->end(); + MBBI != MBBE && MBBI->isPHI(); ++MBBI) { + MachineInstrBuilder PHI(*MF, MBBI); // This value for this PHI node is recorded in PHINodesToUpdate. for (unsigned pn = 0; ; ++pn) { assert(pn != FuncInfo->PHINodesToUpdate.size() && "Didn't find PHI entry!"); - if (FuncInfo->PHINodesToUpdate[pn].first == Phi) { - Phi->addOperand(MachineOperand:: - CreateReg(FuncInfo->PHINodesToUpdate[pn].second, - false)); - Phi->addOperand(MachineOperand::CreateMBB(ThisBB)); + if (FuncInfo->PHINodesToUpdate[pn].first == PHI) { + PHI.addReg(FuncInfo->PHINodesToUpdate[pn].second).addMBB(ThisBB); break; } } @@ -1669,9 +1674,7 @@ SDNode *SelectionDAGISel::Select_INLINEASM(SDNode *N) { std::vector Ops(N->op_begin(), N->op_end()); SelectInlineAsmMemoryOperands(Ops); - std::vector VTs; - VTs.push_back(MVT::Other); - VTs.push_back(MVT::Glue); + EVT VTs[] = { MVT::Other, MVT::Glue }; SDValue New = CurDAG->getNode(ISD::INLINEASM, N->getDebugLoc(), VTs, &Ops[0], Ops.size()); New->setNodeId(-1); @@ -2605,11 +2608,11 @@ SelectCodeCommon(SDNode *NodeToMatch, const unsigned char *MatcherTable, SDValue Imm = RecordedNodes[RecNo].first; if (Imm->getOpcode() == ISD::Constant) { - int64_t Val = cast(Imm)->getZExtValue(); - Imm = CurDAG->getTargetConstant(Val, Imm.getValueType()); + const ConstantInt *Val=cast(Imm)->getConstantIntValue(); + Imm = CurDAG->getConstant(*Val, Imm.getValueType(), true); } else if (Imm->getOpcode() == ISD::ConstantFP) { const ConstantFP *Val=cast(Imm)->getConstantFPValue(); - Imm = CurDAG->getTargetConstantFP(*Val, Imm.getValueType()); + Imm = CurDAG->getConstantFP(*Val, Imm.getValueType(), true); } RecordedNodes.push_back(std::make_pair(Imm, RecordedNodes[RecNo].second)); diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp index 39216356522f..b752b482e3a1 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGPrinter.cpp @@ -11,21 +11,21 @@ // //===----------------------------------------------------------------------===// +#include "llvm/CodeGen/SelectionDAG.h" #include "ScheduleDAGSDNodes.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/StringExtras.h" #include "llvm/Assembly/Writer.h" -#include "llvm/CodeGen/SelectionDAG.h" #include "llvm/CodeGen/MachineConstantPool.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Constants.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/StringExtras.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; namespace llvm { diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 49f55e2fc608..f5fc66c4d3da 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -12,1037 +12,191 @@ //===----------------------------------------------------------------------===// #include "llvm/Target/TargetLowering.h" -#include "llvm/MC/MCAsmInfo.h" -#include "llvm/MC/MCExpr.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetLoweringObjectFile.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/GlobalVariable.h" -#include "llvm/DerivedTypes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" #include "llvm/CodeGen/Analysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/SelectionDAG.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; -/// InitLibcallNames - Set default libcall names. -/// -static void InitLibcallNames(const char **Names) { - Names[RTLIB::SHL_I16] = "__ashlhi3"; - Names[RTLIB::SHL_I32] = "__ashlsi3"; - Names[RTLIB::SHL_I64] = "__ashldi3"; - Names[RTLIB::SHL_I128] = "__ashlti3"; - Names[RTLIB::SRL_I16] = "__lshrhi3"; - Names[RTLIB::SRL_I32] = "__lshrsi3"; - Names[RTLIB::SRL_I64] = "__lshrdi3"; - Names[RTLIB::SRL_I128] = "__lshrti3"; - Names[RTLIB::SRA_I16] = "__ashrhi3"; - Names[RTLIB::SRA_I32] = "__ashrsi3"; - Names[RTLIB::SRA_I64] = "__ashrdi3"; - Names[RTLIB::SRA_I128] = "__ashrti3"; - Names[RTLIB::MUL_I8] = "__mulqi3"; - Names[RTLIB::MUL_I16] = "__mulhi3"; - Names[RTLIB::MUL_I32] = "__mulsi3"; - Names[RTLIB::MUL_I64] = "__muldi3"; - Names[RTLIB::MUL_I128] = "__multi3"; - Names[RTLIB::MULO_I32] = "__mulosi4"; - Names[RTLIB::MULO_I64] = "__mulodi4"; - Names[RTLIB::MULO_I128] = "__muloti4"; - Names[RTLIB::SDIV_I8] = "__divqi3"; - Names[RTLIB::SDIV_I16] = "__divhi3"; - Names[RTLIB::SDIV_I32] = "__divsi3"; - Names[RTLIB::SDIV_I64] = "__divdi3"; - Names[RTLIB::SDIV_I128] = "__divti3"; - Names[RTLIB::UDIV_I8] = "__udivqi3"; - Names[RTLIB::UDIV_I16] = "__udivhi3"; - Names[RTLIB::UDIV_I32] = "__udivsi3"; - Names[RTLIB::UDIV_I64] = "__udivdi3"; - Names[RTLIB::UDIV_I128] = "__udivti3"; - Names[RTLIB::SREM_I8] = "__modqi3"; - Names[RTLIB::SREM_I16] = "__modhi3"; - Names[RTLIB::SREM_I32] = "__modsi3"; - Names[RTLIB::SREM_I64] = "__moddi3"; - Names[RTLIB::SREM_I128] = "__modti3"; - Names[RTLIB::UREM_I8] = "__umodqi3"; - Names[RTLIB::UREM_I16] = "__umodhi3"; - Names[RTLIB::UREM_I32] = "__umodsi3"; - Names[RTLIB::UREM_I64] = "__umoddi3"; - Names[RTLIB::UREM_I128] = "__umodti3"; - - // These are generally not available. - Names[RTLIB::SDIVREM_I8] = 0; - Names[RTLIB::SDIVREM_I16] = 0; - Names[RTLIB::SDIVREM_I32] = 0; - Names[RTLIB::SDIVREM_I64] = 0; - Names[RTLIB::SDIVREM_I128] = 0; - Names[RTLIB::UDIVREM_I8] = 0; - Names[RTLIB::UDIVREM_I16] = 0; - Names[RTLIB::UDIVREM_I32] = 0; - Names[RTLIB::UDIVREM_I64] = 0; - Names[RTLIB::UDIVREM_I128] = 0; - - Names[RTLIB::NEG_I32] = "__negsi2"; - Names[RTLIB::NEG_I64] = "__negdi2"; - Names[RTLIB::ADD_F32] = "__addsf3"; - Names[RTLIB::ADD_F64] = "__adddf3"; - Names[RTLIB::ADD_F80] = "__addxf3"; - Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; - Names[RTLIB::SUB_F32] = "__subsf3"; - Names[RTLIB::SUB_F64] = "__subdf3"; - Names[RTLIB::SUB_F80] = "__subxf3"; - Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; - Names[RTLIB::MUL_F32] = "__mulsf3"; - Names[RTLIB::MUL_F64] = "__muldf3"; - Names[RTLIB::MUL_F80] = "__mulxf3"; - Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; - Names[RTLIB::DIV_F32] = "__divsf3"; - Names[RTLIB::DIV_F64] = "__divdf3"; - Names[RTLIB::DIV_F80] = "__divxf3"; - Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; - Names[RTLIB::REM_F32] = "fmodf"; - Names[RTLIB::REM_F64] = "fmod"; - Names[RTLIB::REM_F80] = "fmodl"; - Names[RTLIB::REM_PPCF128] = "fmodl"; - Names[RTLIB::FMA_F32] = "fmaf"; - Names[RTLIB::FMA_F64] = "fma"; - Names[RTLIB::FMA_F80] = "fmal"; - Names[RTLIB::FMA_PPCF128] = "fmal"; - Names[RTLIB::POWI_F32] = "__powisf2"; - Names[RTLIB::POWI_F64] = "__powidf2"; - Names[RTLIB::POWI_F80] = "__powixf2"; - Names[RTLIB::POWI_PPCF128] = "__powitf2"; - Names[RTLIB::SQRT_F32] = "sqrtf"; - Names[RTLIB::SQRT_F64] = "sqrt"; - Names[RTLIB::SQRT_F80] = "sqrtl"; - Names[RTLIB::SQRT_PPCF128] = "sqrtl"; - Names[RTLIB::LOG_F32] = "logf"; - Names[RTLIB::LOG_F64] = "log"; - Names[RTLIB::LOG_F80] = "logl"; - Names[RTLIB::LOG_PPCF128] = "logl"; - Names[RTLIB::LOG2_F32] = "log2f"; - Names[RTLIB::LOG2_F64] = "log2"; - Names[RTLIB::LOG2_F80] = "log2l"; - Names[RTLIB::LOG2_PPCF128] = "log2l"; - Names[RTLIB::LOG10_F32] = "log10f"; - Names[RTLIB::LOG10_F64] = "log10"; - Names[RTLIB::LOG10_F80] = "log10l"; - Names[RTLIB::LOG10_PPCF128] = "log10l"; - Names[RTLIB::EXP_F32] = "expf"; - Names[RTLIB::EXP_F64] = "exp"; - Names[RTLIB::EXP_F80] = "expl"; - Names[RTLIB::EXP_PPCF128] = "expl"; - Names[RTLIB::EXP2_F32] = "exp2f"; - Names[RTLIB::EXP2_F64] = "exp2"; - Names[RTLIB::EXP2_F80] = "exp2l"; - Names[RTLIB::EXP2_PPCF128] = "exp2l"; - Names[RTLIB::SIN_F32] = "sinf"; - Names[RTLIB::SIN_F64] = "sin"; - Names[RTLIB::SIN_F80] = "sinl"; - Names[RTLIB::SIN_PPCF128] = "sinl"; - Names[RTLIB::COS_F32] = "cosf"; - Names[RTLIB::COS_F64] = "cos"; - Names[RTLIB::COS_F80] = "cosl"; - Names[RTLIB::COS_PPCF128] = "cosl"; - Names[RTLIB::POW_F32] = "powf"; - Names[RTLIB::POW_F64] = "pow"; - Names[RTLIB::POW_F80] = "powl"; - Names[RTLIB::POW_PPCF128] = "powl"; - Names[RTLIB::CEIL_F32] = "ceilf"; - Names[RTLIB::CEIL_F64] = "ceil"; - Names[RTLIB::CEIL_F80] = "ceill"; - Names[RTLIB::CEIL_PPCF128] = "ceill"; - Names[RTLIB::TRUNC_F32] = "truncf"; - Names[RTLIB::TRUNC_F64] = "trunc"; - Names[RTLIB::TRUNC_F80] = "truncl"; - Names[RTLIB::TRUNC_PPCF128] = "truncl"; - Names[RTLIB::RINT_F32] = "rintf"; - Names[RTLIB::RINT_F64] = "rint"; - Names[RTLIB::RINT_F80] = "rintl"; - Names[RTLIB::RINT_PPCF128] = "rintl"; - Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; - Names[RTLIB::NEARBYINT_F64] = "nearbyint"; - Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; - Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; - Names[RTLIB::FLOOR_F32] = "floorf"; - Names[RTLIB::FLOOR_F64] = "floor"; - Names[RTLIB::FLOOR_F80] = "floorl"; - Names[RTLIB::FLOOR_PPCF128] = "floorl"; - Names[RTLIB::COPYSIGN_F32] = "copysignf"; - Names[RTLIB::COPYSIGN_F64] = "copysign"; - Names[RTLIB::COPYSIGN_F80] = "copysignl"; - Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; - Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; - Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; - Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; - Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; - Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; - Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; - Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; - Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; - Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; - Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; - Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; - Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; - Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; - Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; - Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; - Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; - Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; - Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; - Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; - Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; - Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; - Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; - Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; - Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; - Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; - Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; - Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; - Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; - Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; - Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; - Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; - Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; - Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; - Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; - Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; - Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; - Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; - Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; - Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; - Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; - Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; - Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; - Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; - Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; - Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; - Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; - Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; - Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; - Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; - Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; - Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; - Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; - Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; - Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; - Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; - Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; - Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; - Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; - Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; - Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; - Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; - Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; - Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; - Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; - Names[RTLIB::OEQ_F32] = "__eqsf2"; - Names[RTLIB::OEQ_F64] = "__eqdf2"; - Names[RTLIB::UNE_F32] = "__nesf2"; - Names[RTLIB::UNE_F64] = "__nedf2"; - Names[RTLIB::OGE_F32] = "__gesf2"; - Names[RTLIB::OGE_F64] = "__gedf2"; - Names[RTLIB::OLT_F32] = "__ltsf2"; - Names[RTLIB::OLT_F64] = "__ltdf2"; - Names[RTLIB::OLE_F32] = "__lesf2"; - Names[RTLIB::OLE_F64] = "__ledf2"; - Names[RTLIB::OGT_F32] = "__gtsf2"; - Names[RTLIB::OGT_F64] = "__gtdf2"; - Names[RTLIB::UO_F32] = "__unordsf2"; - Names[RTLIB::UO_F64] = "__unorddf2"; - Names[RTLIB::O_F32] = "__unordsf2"; - Names[RTLIB::O_F64] = "__unorddf2"; - Names[RTLIB::MEMCPY] = "memcpy"; - Names[RTLIB::MEMMOVE] = "memmove"; - Names[RTLIB::MEMSET] = "memset"; - Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; - Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; - Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; - Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; - Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; - Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; - Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; - Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; - Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; - Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; - Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; - Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; - Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; - Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; - Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; - Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; - Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; - Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; - Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; - Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; - Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; - Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; - Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; - Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; - Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; - Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; - Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; -} - -/// InitLibcallCallingConvs - Set default libcall CallingConvs. -/// -static void InitLibcallCallingConvs(CallingConv::ID *CCs) { - for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { - CCs[i] = CallingConv::C; - } -} - -/// getFPEXT - Return the FPEXT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::f64) - return FPEXT_F32_F64; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPROUND - Return the FPROUND_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { - if (RetVT == MVT::f32) { - if (OpVT == MVT::f64) - return FPROUND_F64_F32; - if (OpVT == MVT::f80) - return FPROUND_F80_F32; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F32; - } else if (RetVT == MVT::f64) { - if (OpVT == MVT::f80) - return FPROUND_F80_F64; - if (OpVT == MVT::ppcf128) - return FPROUND_PPCF128_F64; - } - - return UNKNOWN_LIBCALL; -} - -/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOSINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOSINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOSINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOSINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOSINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOSINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOSINT_F80_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOSINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOSINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOSINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::f32) { - if (RetVT == MVT::i8) - return FPTOUINT_F32_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F32_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F32_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F32_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F32_I128; - } else if (OpVT == MVT::f64) { - if (RetVT == MVT::i8) - return FPTOUINT_F64_I8; - if (RetVT == MVT::i16) - return FPTOUINT_F64_I16; - if (RetVT == MVT::i32) - return FPTOUINT_F64_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F64_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F64_I128; - } else if (OpVT == MVT::f80) { - if (RetVT == MVT::i32) - return FPTOUINT_F80_I32; - if (RetVT == MVT::i64) - return FPTOUINT_F80_I64; - if (RetVT == MVT::i128) - return FPTOUINT_F80_I128; - } else if (OpVT == MVT::ppcf128) { - if (RetVT == MVT::i32) - return FPTOUINT_PPCF128_I32; - if (RetVT == MVT::i64) - return FPTOUINT_PPCF128_I64; - if (RetVT == MVT::i128) - return FPTOUINT_PPCF128_I128; - } - return UNKNOWN_LIBCALL; -} - -/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return SINTTOFP_I32_F32; - else if (RetVT == MVT::f64) - return SINTTOFP_I32_F64; - else if (RetVT == MVT::f80) - return SINTTOFP_I32_F80; - else if (RetVT == MVT::ppcf128) - return SINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return SINTTOFP_I64_F32; - else if (RetVT == MVT::f64) - return SINTTOFP_I64_F64; - else if (RetVT == MVT::f80) - return SINTTOFP_I64_F80; - else if (RetVT == MVT::ppcf128) - return SINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return SINTTOFP_I128_F32; - else if (RetVT == MVT::f64) - return SINTTOFP_I128_F64; - else if (RetVT == MVT::f80) - return SINTTOFP_I128_F80; - else if (RetVT == MVT::ppcf128) - return SINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - -/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or -/// UNKNOWN_LIBCALL if there is none. -RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { - if (OpVT == MVT::i32) { - if (RetVT == MVT::f32) - return UINTTOFP_I32_F32; - else if (RetVT == MVT::f64) - return UINTTOFP_I32_F64; - else if (RetVT == MVT::f80) - return UINTTOFP_I32_F80; - else if (RetVT == MVT::ppcf128) - return UINTTOFP_I32_PPCF128; - } else if (OpVT == MVT::i64) { - if (RetVT == MVT::f32) - return UINTTOFP_I64_F32; - else if (RetVT == MVT::f64) - return UINTTOFP_I64_F64; - else if (RetVT == MVT::f80) - return UINTTOFP_I64_F80; - else if (RetVT == MVT::ppcf128) - return UINTTOFP_I64_PPCF128; - } else if (OpVT == MVT::i128) { - if (RetVT == MVT::f32) - return UINTTOFP_I128_F32; - else if (RetVT == MVT::f64) - return UINTTOFP_I128_F64; - else if (RetVT == MVT::f80) - return UINTTOFP_I128_F80; - else if (RetVT == MVT::ppcf128) - return UINTTOFP_I128_PPCF128; - } - return UNKNOWN_LIBCALL; -} - -/// InitCmpLibcallCCs - Set default comparison libcall CC. -/// -static void InitCmpLibcallCCs(ISD::CondCode *CCs) { - memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); - CCs[RTLIB::OEQ_F32] = ISD::SETEQ; - CCs[RTLIB::OEQ_F64] = ISD::SETEQ; - CCs[RTLIB::UNE_F32] = ISD::SETNE; - CCs[RTLIB::UNE_F64] = ISD::SETNE; - CCs[RTLIB::OGE_F32] = ISD::SETGE; - CCs[RTLIB::OGE_F64] = ISD::SETGE; - CCs[RTLIB::OLT_F32] = ISD::SETLT; - CCs[RTLIB::OLT_F64] = ISD::SETLT; - CCs[RTLIB::OLE_F32] = ISD::SETLE; - CCs[RTLIB::OLE_F64] = ISD::SETLE; - CCs[RTLIB::OGT_F32] = ISD::SETGT; - CCs[RTLIB::OGT_F64] = ISD::SETGT; - CCs[RTLIB::UO_F32] = ISD::SETNE; - CCs[RTLIB::UO_F64] = ISD::SETNE; - CCs[RTLIB::O_F32] = ISD::SETEQ; - CCs[RTLIB::O_F64] = ISD::SETEQ; -} - /// NOTE: The constructor takes ownership of TLOF. TargetLowering::TargetLowering(const TargetMachine &tm, const TargetLoweringObjectFile *tlof) - : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { - // All operations default to being supported. - memset(OpActions, 0, sizeof(OpActions)); - memset(LoadExtActions, 0, sizeof(LoadExtActions)); - memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); - memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); - memset(CondCodeActions, 0, sizeof(CondCodeActions)); - - // Set default actions for various operations. - for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { - // Default all indexed load / store to expand. - for (unsigned IM = (unsigned)ISD::PRE_INC; - IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { - setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); - setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); - } - - // These operations default to expand. - setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); - setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); - } - - // Most targets ignore the @llvm.prefetch intrinsic. - setOperationAction(ISD::PREFETCH, MVT::Other, Expand); - - // ConstantFP nodes default to expand. Targets can either change this to - // Legal, in which case all fp constants are legal, or use isFPImmLegal() - // to optimize expansions for certain constants. - setOperationAction(ISD::ConstantFP, MVT::f16, Expand); - setOperationAction(ISD::ConstantFP, MVT::f32, Expand); - setOperationAction(ISD::ConstantFP, MVT::f64, Expand); - setOperationAction(ISD::ConstantFP, MVT::f80, Expand); - - // These library functions default to expand. - setOperationAction(ISD::FLOG , MVT::f16, Expand); - setOperationAction(ISD::FLOG2, MVT::f16, Expand); - setOperationAction(ISD::FLOG10, MVT::f16, Expand); - setOperationAction(ISD::FEXP , MVT::f16, Expand); - setOperationAction(ISD::FEXP2, MVT::f16, Expand); - setOperationAction(ISD::FFLOOR, MVT::f16, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); - setOperationAction(ISD::FCEIL, MVT::f16, Expand); - setOperationAction(ISD::FRINT, MVT::f16, Expand); - setOperationAction(ISD::FTRUNC, MVT::f16, Expand); - setOperationAction(ISD::FLOG , MVT::f32, Expand); - setOperationAction(ISD::FLOG2, MVT::f32, Expand); - setOperationAction(ISD::FLOG10, MVT::f32, Expand); - setOperationAction(ISD::FEXP , MVT::f32, Expand); - setOperationAction(ISD::FEXP2, MVT::f32, Expand); - setOperationAction(ISD::FFLOOR, MVT::f32, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); - setOperationAction(ISD::FCEIL, MVT::f32, Expand); - setOperationAction(ISD::FRINT, MVT::f32, Expand); - setOperationAction(ISD::FTRUNC, MVT::f32, Expand); - setOperationAction(ISD::FLOG , MVT::f64, Expand); - setOperationAction(ISD::FLOG2, MVT::f64, Expand); - setOperationAction(ISD::FLOG10, MVT::f64, Expand); - setOperationAction(ISD::FEXP , MVT::f64, Expand); - setOperationAction(ISD::FEXP2, MVT::f64, Expand); - setOperationAction(ISD::FFLOOR, MVT::f64, Expand); - setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); - setOperationAction(ISD::FCEIL, MVT::f64, Expand); - setOperationAction(ISD::FRINT, MVT::f64, Expand); - setOperationAction(ISD::FTRUNC, MVT::f64, Expand); - - // Default ISD::TRAP to expand (which turns it into abort). - setOperationAction(ISD::TRAP, MVT::Other, Expand); - - // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" - // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. - // - setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); - - IsLittleEndian = TD->isLittleEndian(); - PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); - memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); - maxStoresPerMemset = maxStoresPerMemcpy = maxStoresPerMemmove = 8; - maxStoresPerMemsetOptSize = maxStoresPerMemcpyOptSize - = maxStoresPerMemmoveOptSize = 4; - benefitFromCodePlacementOpt = false; - UseUnderscoreSetJmp = false; - UseUnderscoreLongJmp = false; - SelectIsExpensive = false; - IntDivIsCheap = false; - Pow2DivIsCheap = false; - JumpIsExpensive = false; - predictableSelectIsExpensive = false; - StackPointerRegisterToSaveRestore = 0; - ExceptionPointerRegister = 0; - ExceptionSelectorRegister = 0; - BooleanContents = UndefinedBooleanContent; - BooleanVectorContents = UndefinedBooleanContent; - SchedPreferenceInfo = Sched::ILP; - JumpBufSize = 0; - JumpBufAlignment = 0; - MinFunctionAlignment = 0; - PrefFunctionAlignment = 0; - PrefLoopAlignment = 0; - MinStackArgumentAlignment = 1; - ShouldFoldAtomicFences = false; - InsertFencesForAtomic = false; - SupportJumpTables = true; - MinimumJumpTableEntries = 4; - - InitLibcallNames(LibcallRoutineNames); - InitCmpLibcallCCs(CmpLibcallCCs); - InitLibcallCallingConvs(LibcallCallingConvs); -} + : TargetLoweringBase(tm, tlof) {} -TargetLowering::~TargetLowering() { - delete &TLOF; -} - -MVT TargetLowering::getShiftAmountTy(EVT LHSTy) const { - return MVT::getIntegerVT(8*TD->getPointerSize(0)); +const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { + return NULL; } -/// canOpTrap - Returns true if the operation can trap for the value type. -/// VT must be a legal type. -bool TargetLowering::canOpTrap(unsigned Op, EVT VT) const { - assert(isTypeLegal(VT)); - switch (Op) { - default: +/// Check whether a given call node is in tail position within its function. If +/// so, it sets Chain to the input chain of the tail call. +bool TargetLowering::isInTailCallPosition(SelectionDAG &DAG, SDNode *Node, + SDValue &Chain) const { + const Function *F = DAG.getMachineFunction().getFunction(); + + // Conservatively require the attributes of the call to match those of + // the return. Ignore noalias because it doesn't affect the call sequence. + AttributeSet CallerAttrs = F->getAttributes(); + if (AttrBuilder(CallerAttrs, AttributeSet::ReturnIndex) + .removeAttribute(Attribute::NoAlias).hasAttributes()) return false; - case ISD::FDIV: - case ISD::FREM: - case ISD::SDIV: - case ISD::UDIV: - case ISD::SREM: - case ISD::UREM: - return true; - } -} - - -static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT, - TargetLowering *TLI) { - // Figure out the right, legal destination reg to copy into. - unsigned NumElts = VT.getVectorNumElements(); - MVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; - } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - MVT NewVT = MVT::getVectorVT(EltTy, NumElts); - if (!TLI->isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - EVT DestVT = TLI->getRegisterType(NewVT); - RegisterVT = DestVT; - if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + // It's not safe to eliminate the sign / zero extension of the return value. + if (CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt) || + CallerAttrs.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + return false; - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; + // Check if the only use is a function return node. + return isUsedByReturnOnly(Node, Chain); } -/// isLegalRC - Return true if the value types that can be represented by the -/// specified register class are all legal. -bool TargetLowering::isLegalRC(const TargetRegisterClass *RC) const { - for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); - I != E; ++I) { - if (isTypeLegal(*I)) - return true; - } - return false; -} -/// findRepresentativeClass - Return the largest legal super-reg register class -/// of the register class for the specified type and its associated "cost". -std::pair -TargetLowering::findRepresentativeClass(EVT VT) const { - const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); - const TargetRegisterClass *RC = RegClassForVT[VT.getSimpleVT().SimpleTy]; - if (!RC) - return std::make_pair(RC, 0); - - // Compute the set of all super-register classes. - BitVector SuperRegRC(TRI->getNumRegClasses()); - for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) - SuperRegRC.setBitsInMask(RCI.getMask()); - - // Find the first legal register class with the largest spill size. - const TargetRegisterClass *BestRC = RC; - for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { - const TargetRegisterClass *SuperRC = TRI->getRegClass(i); - // We want the largest possible spill size. - if (SuperRC->getSize() <= BestRC->getSize()) - continue; - if (!isLegalRC(SuperRC)) - continue; - BestRC = SuperRC; - } - return std::make_pair(BestRC, 1); +/// Generate a libcall taking the given operands as arguments and returning a +/// result of type RetVT. +SDValue TargetLowering::makeLibCall(SelectionDAG &DAG, + RTLIB::Libcall LC, EVT RetVT, + const SDValue *Ops, unsigned NumOps, + bool isSigned, DebugLoc dl) const { + TargetLowering::ArgListTy Args; + Args.reserve(NumOps); + + TargetLowering::ArgListEntry Entry; + for (unsigned i = 0; i != NumOps; ++i) { + Entry.Node = Ops[i]; + Entry.Ty = Entry.Node.getValueType().getTypeForEVT(*DAG.getContext()); + Entry.isSExt = isSigned; + Entry.isZExt = !isSigned; + Args.push_back(Entry); + } + SDValue Callee = DAG.getExternalSymbol(getLibcallName(LC), getPointerTy()); + + Type *RetTy = RetVT.getTypeForEVT(*DAG.getContext()); + TargetLowering:: + CallLoweringInfo CLI(DAG.getEntryNode(), RetTy, isSigned, !isSigned, false, + false, 0, getLibcallCallingConv(LC), + /*isTailCall=*/false, + /*doesNotReturn=*/false, /*isReturnValueUsed=*/true, + Callee, Args, DAG, dl); + std::pair CallInfo = LowerCallTo(CLI); + + return CallInfo.first; } -/// computeRegisterProperties - Once all of the register classes are added, -/// this allows us to compute derived properties we expose. -void TargetLowering::computeRegisterProperties() { - assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && - "Too many value types for ValueTypeActions to hold!"); - // Everything defaults to needing one register. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - NumRegistersForVT[i] = 1; - RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; - } - // ...except isVoid, which doesn't need any registers. - NumRegistersForVT[MVT::isVoid] = 0; - - // Find the largest integer register class. - unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; - for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) - assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); - - // Every integer value type larger than this largest register takes twice as - // many registers to represent as the previous ValueType. - for (unsigned ExpandedReg = LargestIntReg + 1; ; ++ExpandedReg) { - EVT ExpandedVT = (MVT::SimpleValueType)ExpandedReg; - if (!ExpandedVT.isInteger()) +/// SoftenSetCCOperands - Soften the operands of a comparison. This code is +/// shared among BR_CC, SELECT_CC, and SETCC handlers. +void TargetLowering::softenSetCCOperands(SelectionDAG &DAG, EVT VT, + SDValue &NewLHS, SDValue &NewRHS, + ISD::CondCode &CCCode, + DebugLoc dl) const { + assert((VT == MVT::f32 || VT == MVT::f64 || VT == MVT::f128) + && "Unsupported setcc type!"); + + // Expand into one or more soft-fp libcall(s). + RTLIB::Libcall LC1 = RTLIB::UNKNOWN_LIBCALL, LC2 = RTLIB::UNKNOWN_LIBCALL; + switch (CCCode) { + case ISD::SETEQ: + case ISD::SETOEQ: + LC1 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + case ISD::SETNE: + case ISD::SETUNE: + LC1 = (VT == MVT::f32) ? RTLIB::UNE_F32 : + (VT == MVT::f64) ? RTLIB::UNE_F64 : RTLIB::UNE_F128; + break; + case ISD::SETGE: + case ISD::SETOGE: + LC1 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETLT: + case ISD::SETOLT: + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETLE: + case ISD::SETOLE: + LC1 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETGT: + case ISD::SETOGT: + LC1 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; + break; + case ISD::SETUO: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + break; + case ISD::SETO: + LC1 = (VT == MVT::f32) ? RTLIB::O_F32 : + (VT == MVT::f64) ? RTLIB::O_F64 : RTLIB::O_F128; + break; + default: + LC1 = (VT == MVT::f32) ? RTLIB::UO_F32 : + (VT == MVT::f64) ? RTLIB::UO_F64 : RTLIB::UO_F128; + switch (CCCode) { + case ISD::SETONE: + // SETONE = SETOLT | SETOGT + LC1 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + // Fallthrough + case ISD::SETUGT: + LC2 = (VT == MVT::f32) ? RTLIB::OGT_F32 : + (VT == MVT::f64) ? RTLIB::OGT_F64 : RTLIB::OGT_F128; break; - NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; - RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; - TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); - ValueTypeActions.setTypeAction(ExpandedVT, TypeExpandInteger); - } - - // Inspect all of the ValueType's smaller than the largest integer - // register to see which ones need promotion. - unsigned LegalIntReg = LargestIntReg; - for (unsigned IntReg = LargestIntReg - 1; - IntReg >= (unsigned)MVT::i1; --IntReg) { - EVT IVT = (MVT::SimpleValueType)IntReg; - if (isTypeLegal(IVT)) { - LegalIntReg = IntReg; - } else { - RegisterTypeForVT[IntReg] = TransformToType[IntReg] = - (const MVT::SimpleValueType)LegalIntReg; - ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); - } - } - - // ppcf128 type is really two f64's. - if (!isTypeLegal(MVT::ppcf128)) { - NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::ppcf128] = MVT::f64; - TransformToType[MVT::ppcf128] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); - } - - // Decide how to handle f64. If the target does not have native f64 support, - // expand it to i64 and we will be generating soft float library calls. - if (!isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; - RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; - TransformToType[MVT::f64] = MVT::i64; - ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); - } - - // Decide how to handle f32. If the target does not have native support for - // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. - if (!isTypeLegal(MVT::f32)) { - if (isTypeLegal(MVT::f64)) { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; - TransformToType[MVT::f32] = MVT::f64; - ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); - } else { - NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; - RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; - TransformToType[MVT::f32] = MVT::i32; - ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); - } - } - - // Loop over all of the vector value types to see which need transformations. - for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; - i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { - MVT VT = (MVT::SimpleValueType)i; - if (isTypeLegal(VT)) continue; - - // Determine if there is a legal wider type. If so, we should promote to - // that wider vector type. - EVT EltVT = VT.getVectorElementType(); - unsigned NElts = VT.getVectorNumElements(); - if (NElts != 1) { - bool IsLegalWiderType = false; - // First try to promote the elements of integer vectors. If no legal - // promotion was found, fallback to the widen-vector method. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - // Promote vectors of integers to vectors with the same number - // of elements, with a wider element type. - if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() - && SVT.getVectorNumElements() == NElts && - isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypePromoteInteger); - IsLegalWiderType = true; - break; - } - } - - if (IsLegalWiderType) continue; - - // Try to widen the vector. - for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { - EVT SVT = (MVT::SimpleValueType)nVT; - if (SVT.getVectorElementType() == EltVT && - SVT.getVectorNumElements() > NElts && - isTypeLegal(SVT)) { - TransformToType[i] = SVT; - RegisterTypeForVT[i] = SVT; - NumRegistersForVT[i] = 1; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); - IsLegalWiderType = true; - break; - } - } - if (IsLegalWiderType) continue; - } - - MVT IntermediateVT; - EVT RegisterVT; - unsigned NumIntermediates; - NumRegistersForVT[i] = - getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, - RegisterVT, this); - RegisterTypeForVT[i] = RegisterVT; - - EVT NVT = VT.getPow2VectorType(); - if (NVT == VT) { - // Type is already a power of 2. The default action is to split. - TransformToType[i] = MVT::Other; - unsigned NumElts = VT.getVectorNumElements(); - ValueTypeActions.setTypeAction(VT, - NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); - } else { - TransformToType[i] = NVT; - ValueTypeActions.setTypeAction(VT, TypeWidenVector); + case ISD::SETUGE: + LC2 = (VT == MVT::f32) ? RTLIB::OGE_F32 : + (VT == MVT::f64) ? RTLIB::OGE_F64 : RTLIB::OGE_F128; + break; + case ISD::SETULT: + LC2 = (VT == MVT::f32) ? RTLIB::OLT_F32 : + (VT == MVT::f64) ? RTLIB::OLT_F64 : RTLIB::OLT_F128; + break; + case ISD::SETULE: + LC2 = (VT == MVT::f32) ? RTLIB::OLE_F32 : + (VT == MVT::f64) ? RTLIB::OLE_F64 : RTLIB::OLE_F128; + break; + case ISD::SETUEQ: + LC2 = (VT == MVT::f32) ? RTLIB::OEQ_F32 : + (VT == MVT::f64) ? RTLIB::OEQ_F64 : RTLIB::OEQ_F128; + break; + default: llvm_unreachable("Do not know how to soften this setcc!"); } } - // Determine the 'representative' register class for each value type. - // An representative register class is the largest (meaning one which is - // not a sub-register class / subreg register class) legal register class for - // a group of value types. For example, on i386, i8, i16, and i32 - // representative would be GR32; while on x86_64 it's GR64. - for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { - const TargetRegisterClass* RRC; - uint8_t Cost; - tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); - RepRegClassForVT[i] = RRC; - RepRegClassCostForVT[i] = Cost; + // Use the target specific return value for comparions lib calls. + EVT RetVT = getCmpLibcallReturnType(); + SDValue Ops[2] = { NewLHS, NewRHS }; + NewLHS = makeLibCall(DAG, LC1, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewRHS = DAG.getConstant(0, RetVT); + CCCode = getCmpLibcallCC(LC1); + if (LC2 != RTLIB::UNKNOWN_LIBCALL) { + SDValue Tmp = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), + NewLHS, NewRHS, DAG.getCondCode(CCCode)); + NewLHS = makeLibCall(DAG, LC2, RetVT, Ops, 2, false/*sign irrelevant*/, dl); + NewLHS = DAG.getNode(ISD::SETCC, dl, getSetCCResultType(RetVT), NewLHS, + NewRHS, DAG.getCondCode(getCmpLibcallCC(LC2))); + NewLHS = DAG.getNode(ISD::OR, dl, Tmp.getValueType(), Tmp, NewLHS); + NewRHS = SDValue(); } } -const char *TargetLowering::getTargetNodeName(unsigned Opcode) const { - return NULL; -} - -EVT TargetLowering::getSetCCResultType(EVT VT) const { - assert(!VT.isVector() && "No default SetCC type for vectors!"); - return getPointerTy(0).SimpleTy; -} - -MVT::SimpleValueType TargetLowering::getCmpLibcallReturnType() const { - return MVT::i32; // return the default value -} - -/// getVectorTypeBreakdown - Vector types are broken down into some number of -/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 -/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. -/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. -/// -/// This method returns the number of registers needed, and the VT for each -/// register. It also returns the VT and quantity of the intermediate values -/// before they are promoted/expanded. -/// -unsigned TargetLowering::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, - EVT &IntermediateVT, - unsigned &NumIntermediates, - EVT &RegisterVT) const { - unsigned NumElts = VT.getVectorNumElements(); - - // If there is a wider vector type with the same element type as this one, - // or a promoted vector type that has the same number of elements which - // are wider, then we should convert to that legal vector type. - // This handles things like <2 x float> -> <4 x float> and - // <4 x i1> -> <4 x i32>. - LegalizeTypeAction TA = getTypeAction(Context, VT); - if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { - RegisterVT = getTypeToTransformTo(Context, VT); - if (isTypeLegal(RegisterVT)) { - IntermediateVT = RegisterVT; - NumIntermediates = 1; - return 1; - } - } - - // Figure out the right, legal destination reg to copy into. - EVT EltTy = VT.getVectorElementType(); - - unsigned NumVectorRegs = 1; - - // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we - // could break down into LHS/RHS like LegalizeDAG does. - if (!isPowerOf2_32(NumElts)) { - NumVectorRegs = NumElts; - NumElts = 1; - } - - // Divide the input until we get to a supported size. This will always - // end with a scalar if the target doesn't support vectors. - while (NumElts > 1 && !isTypeLegal( - EVT::getVectorVT(Context, EltTy, NumElts))) { - NumElts >>= 1; - NumVectorRegs <<= 1; - } - - NumIntermediates = NumVectorRegs; - - EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); - if (!isTypeLegal(NewVT)) - NewVT = EltTy; - IntermediateVT = NewVT; - - EVT DestVT = getRegisterType(Context, NewVT); - RegisterVT = DestVT; - unsigned NewVTSize = NewVT.getSizeInBits(); - - // Convert sizes such as i33 to i64. - if (!isPowerOf2_32(NewVTSize)) - NewVTSize = NextPowerOf2(NewVTSize); - - if (DestVT.bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. - return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); - - // Otherwise, promotion or legal types use the same number of registers as - // the vector decimated to the appropriate level. - return NumVectorRegs; -} - -/// Get the EVTs and ArgFlags collections that represent the legalized return -/// type of the given function. This does not require a DAG or a return value, -/// and is suitable for use before any DAGs for the function are constructed. -/// TODO: Move this out of TargetLowering.cpp. -void llvm::GetReturnInfo(Type* ReturnType, Attributes attr, - SmallVectorImpl &Outs, - const TargetLowering &TLI) { - SmallVector ValueVTs; - ComputeValueVTs(TLI, ReturnType, ValueVTs); - unsigned NumValues = ValueVTs.size(); - if (NumValues == 0) return; - - for (unsigned j = 0, f = NumValues; j != f; ++j) { - EVT VT = ValueVTs[j]; - ISD::NodeType ExtendKind = ISD::ANY_EXTEND; - - if (attr.hasAttribute(Attributes::SExt)) - ExtendKind = ISD::SIGN_EXTEND; - else if (attr.hasAttribute(Attributes::ZExt)) - ExtendKind = ISD::ZERO_EXTEND; - - // FIXME: C calling convention requires the return type to be promoted to - // at least 32-bit. But this is not necessary for non-C calling - // conventions. The frontend should mark functions whose return values - // require promoting with signext or zeroext attributes. - if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { - EVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); - if (VT.bitsLT(MinVT)) - VT = MinVT; - } - - unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); - EVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); - - // 'inreg' on function refers to return value - ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); - if (attr.hasAttribute(Attributes::InReg)) - Flags.setInReg(); - - // Propagate extension type if any - if (attr.hasAttribute(Attributes::SExt)) - Flags.setSExt(); - else if (attr.hasAttribute(Attributes::ZExt)) - Flags.setZExt(); - - for (unsigned i = 0; i < NumParts; ++i) - Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); - } -} - -/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate -/// function arguments in the caller parameter area. This is the actual -/// alignment, not its logarithm. -unsigned TargetLowering::getByValTypeAlignment(Type *Ty) const { - return TD->getCallFrameTypeAlignment(Ty); -} - /// getJumpTableEncoding - Return the entry encoding for a jump table in the /// current function. The returned value is a member of the /// MachineJumpTableInfo::JTEntryKind enum. @@ -1162,7 +316,8 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, // Search for the smallest integer type with free casts to and from // Op's type. For expedience, just check power-of-2 integer types. const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - unsigned SmallVTBits = BitWidth - Demanded.countLeadingZeros(); + unsigned DemandedSize = BitWidth - Demanded.countLeadingZeros(); + unsigned SmallVTBits = DemandedSize; if (!isPowerOf2_32(SmallVTBits)) SmallVTBits = NextPowerOf2(SmallVTBits); for (; SmallVTBits < BitWidth; SmallVTBits = NextPowerOf2(SmallVTBits)) { @@ -1175,7 +330,9 @@ TargetLowering::TargetLoweringOpt::ShrinkDemandedOp(SDValue Op, Op.getNode()->getOperand(0)), DAG.getNode(ISD::TRUNCATE, dl, SmallVT, Op.getNode()->getOperand(1))); - SDValue Z = DAG.getNode(ISD::ZERO_EXTEND, dl, Op.getValueType(), X); + bool NeedZext = DemandedSize > SmallVTBits; + SDValue Z = DAG.getNode(NeedZext ? ISD::ZERO_EXTEND : ISD::ANY_EXTEND, + dl, Op.getValueType(), X); return CombineTo(Op, Z); } } @@ -2039,7 +1196,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, APInt newMask = APInt::getLowBitsSet(maskWidth, width); for (unsigned offset=0; offsetisLittleEndian()) + if (!getDataLayout()->isLittleEndian()) bestOffset = (origWidth/width - offset - 1) * (width/8); else bestOffset = (uint64_t)offset * (width/8); @@ -2111,7 +1268,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, EVT newVT = N0.getOperand(0).getValueType(); if (DCI.isBeforeLegalizeOps() || (isOperationLegal(ISD::SETCC, newVT) && - getCondCodeAction(Cond, newVT)==Legal)) + getCondCodeAction(Cond, newVT.getSimpleVT())==Legal)) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(C1.trunc(InSize), newVT), Cond); @@ -2207,9 +1364,10 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, Cond = (Cond == ISD::SETEQ) ? ISD::SETNE : ISD::SETEQ; return DAG.getSetCC(dl, VT, Op0.getOperand(0), Op0.getOperand(1), Cond); - } else if (Op0.getOpcode() == ISD::AND && - isa(Op0.getOperand(1)) && - cast(Op0.getOperand(1))->getAPIntValue() == 1) { + } + if (Op0.getOpcode() == ISD::AND && + isa(Op0.getOperand(1)) && + cast(Op0.getOperand(1))->getAPIntValue() == 1) { // If this is (X&1) == / != 1, normalize it to (X&1) != / == 0. if (Op0.getValueType().bitsGT(VT)) Op0 = DAG.getNode(ISD::AND, dl, VT, @@ -2224,6 +1382,11 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(0, Op0.getValueType()), Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } + if (Op0.getOpcode() == ISD::AssertZext && + cast(Op0.getOperand(1))->getVT() == MVT::i1) + return DAG.getSetCC(dl, VT, Op0, + DAG.getConstant(0, Op0.getValueType()), + Cond == ISD::SETEQ ? ISD::SETNE : ISD::SETEQ); } } @@ -2276,7 +1439,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, DAG.getConstant(MinVal, N0.getValueType()), ISD::SETEQ); // If we have setugt X, Max-1, turn it into seteq X, Max - else if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) + if ((Cond == ISD::SETGT || Cond == ISD::SETUGT) && C1 == MaxVal-1) return DAG.getSetCC(dl, VT, N0, DAG.getConstant(MaxVal, N0.getValueType()), ISD::SETEQ); @@ -2406,36 +1569,36 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // If the condition is not legal, see if we can find an equivalent one // which is legal. - if (!isCondCodeLegal(Cond, N0.getValueType())) { + if (!isCondCodeLegal(Cond, N0.getSimpleValueType())) { // If the comparison was an awkward floating-point == or != and one of // the comparison operands is infinity or negative infinity, convert the // condition to a less-awkward <= or >=. if (CFP->getValueAPF().isInfinity()) { if (CFP->getValueAPF().isNegative()) { if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLE); if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOLE, N0.getValueType())) + isCondCodeLegal(ISD::SETOLE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULE); if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGT); if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETUGT, N0.getValueType())) + isCondCodeLegal(ISD::SETUGT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGT); } else { if (Cond == ISD::SETOEQ && - isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOGE); if (Cond == ISD::SETUEQ && - isCondCodeLegal(ISD::SETOGE, N0.getValueType())) + isCondCodeLegal(ISD::SETOGE, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETUGE); if (Cond == ISD::SETUNE && - isCondCodeLegal(ISD::SETULT, N0.getValueType())) + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETULT); if (Cond == ISD::SETONE && - isCondCodeLegal(ISD::SETULT, N0.getValueType())) + isCondCodeLegal(ISD::SETULT, N0.getSimpleValueType())) return DAG.getSetCC(dl, VT, N0, N1, ISD::SETOLT); } } @@ -2469,7 +1632,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, // if it is not already. ISD::CondCode NewCond = UOF == 0 ? ISD::SETO : ISD::SETUO; if (NewCond != Cond && (DCI.isBeforeLegalizeOps() || - getCondCodeAction(NewCond, N0.getValueType()) == Legal)) + getCondCodeAction(NewCond, N0.getSimpleValueType()) == Legal)) return DAG.getSetCC(dl, VT, N0, N1, NewCond); } @@ -2550,7 +1713,7 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (DAG.isCommutativeBinOp(N0.getOpcode())) return DAG.getSetCC(dl, VT, N0.getOperand(0), DAG.getConstant(0, N0.getValueType()), Cond); - else if (N0.getNode()->hasOneUse()) { + if (N0.getNode()->hasOneUse()) { assert(N0.getOpcode() == ISD::SUB && "Unexpected operation!"); // (Z-X) == X --> Z == X<<1 SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N1, @@ -2566,14 +1729,14 @@ TargetLowering::SimplifySetCC(EVT VT, SDValue N0, SDValue N1, if (N1.getOpcode() == ISD::ADD || N1.getOpcode() == ISD::SUB || N1.getOpcode() == ISD::XOR) { // Simplify X == (X+Z) --> Z == 0 - if (N1.getOperand(0) == N0) { + if (N1.getOperand(0) == N0) return DAG.getSetCC(dl, VT, N1.getOperand(1), DAG.getConstant(0, N1.getValueType()), Cond); - } else if (N1.getOperand(1) == N0) { - if (DAG.isCommutativeBinOp(N1.getOpcode())) { + if (N1.getOperand(1) == N0) { + if (DAG.isCommutativeBinOp(N1.getOpcode())) return DAG.getSetCC(dl, VT, N1.getOperand(0), DAG.getConstant(0, N1.getValueType()), Cond); - } else if (N1.getNode()->hasOneUse()) { + if (N1.getNode()->hasOneUse()) { assert(N1.getOpcode() == ISD::SUB && "Unexpected operation!"); // X == (Z-X) --> X<<1 == Z SDValue SH = DAG.getNode(ISD::SHL, dl, N1.getValueType(), N0, @@ -2707,7 +1870,9 @@ PerformDAGCombine(SDNode *N, DAGCombinerInfo &DCI) const { TargetLowering::ConstraintType TargetLowering::getConstraintType(const std::string &Constraint) const { - if (Constraint.size() == 1) { + unsigned S = Constraint.size(); + + if (S == 1) { switch (Constraint[0]) { default: break; case 'r': return C_RegisterClass; @@ -2736,9 +1901,11 @@ TargetLowering::getConstraintType(const std::string &Constraint) const { } } - if (Constraint.size() > 1 && Constraint[0] == '{' && - Constraint[Constraint.size()-1] == '}') + if (S > 1 && Constraint[0] == '{' && Constraint[S-1] == '}') { + if (S == 8 && !Constraint.compare(1, 6, "memory", 6)) // "{memory}" + return C_Memory; return C_Register; + } return C_Unknown; } @@ -2830,8 +1997,11 @@ getRegForInlineAsmConstraint(const std::string &Constraint, // Remove the braces from around the name. StringRef RegName(Constraint.data()+1, Constraint.size()-2); + std::pair R = + std::make_pair(0u, static_cast(0)); + // Figure out which register class contains this reg. - const TargetRegisterInfo *RI = TM.getRegisterInfo(); + const TargetRegisterInfo *RI = getTargetMachine().getRegisterInfo(); for (TargetRegisterInfo::regclass_iterator RCI = RI->regclass_begin(), E = RI->regclass_end(); RCI != E; ++RCI) { const TargetRegisterClass *RC = *RCI; @@ -2843,12 +2013,22 @@ getRegForInlineAsmConstraint(const std::string &Constraint, for (TargetRegisterClass::iterator I = RC->begin(), E = RC->end(); I != E; ++I) { - if (RegName.equals_lower(RI->getName(*I))) - return std::make_pair(*I, RC); + if (RegName.equals_lower(RI->getName(*I))) { + std::pair S = + std::make_pair(*I, RC); + + // If this register class has the requested value type, return it, + // otherwise keep searching and return the first class found + // if no other is found which explicitly has the requested type. + if (RC->hasType(VT)) + return S; + else if (!R.second) + R = S; + } } } - return std::make_pair(0u, static_cast(0)); + return R; } //===----------------------------------------------------------------------===// @@ -2858,7 +2038,7 @@ getRegForInlineAsmConstraint(const std::string &Constraint, /// a matching constraint like "4". bool TargetLowering::AsmOperandInfo::isMatchingInputConstraint() const { assert(!ConstraintCode.empty() && "No known constraint!"); - return isdigit(ConstraintCode[0]); + return isdigit(static_cast(ConstraintCode[0])); } /// getMatchedOperand - If this is an input matching constraint, this method @@ -2913,10 +2093,10 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( assert(!CS.getType()->isVoidTy() && "Bad inline asm!"); if (StructType *STy = dyn_cast(CS.getType())) { - OpInfo.ConstraintVT = getValueType(STy->getElementType(ResNo)); + OpInfo.ConstraintVT = getSimpleValueType(STy->getElementType(ResNo)); } else { assert(ResNo == 0 && "Asm only has one result!"); - OpInfo.ConstraintVT = getValueType(CS.getType()); + OpInfo.ConstraintVT = getSimpleValueType(CS.getType()); } ++ResNo; break; @@ -2945,7 +2125,7 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( // If OpTy is not a single value, it may be a struct/union that we // can tile with integers. if (!OpTy->isSingleValueType() && OpTy->isSized()) { - unsigned BitSize = TD->getTypeSizeInBits(OpTy); + unsigned BitSize = getDataLayout()->getTypeSizeInBits(OpTy); switch (BitSize) { default: break; case 1: @@ -2955,14 +2135,14 @@ TargetLowering::AsmOperandInfoVector TargetLowering::ParseConstraints( case 64: case 128: OpInfo.ConstraintVT = - EVT::getEVT(IntegerType::get(OpTy->getContext(), BitSize), true); + MVT::getVT(IntegerType::get(OpTy->getContext(), BitSize), true); break; } } else if (PointerType *PT = dyn_cast(OpTy)) { OpInfo.ConstraintVT = MVT::getIntegerVT( - 8*TD->getPointerSize(PT->getAddressSpace())); + 8*getDataLayout()->getPointerSize(PT->getAddressSpace())); } else { - OpInfo.ConstraintVT = EVT::getEVT(OpTy, true); + OpInfo.ConstraintVT = MVT::getVT(OpTy, true); } } } @@ -3255,44 +2435,6 @@ void TargetLowering::ComputeConstraintToUse(AsmOperandInfo &OpInfo, } } -//===----------------------------------------------------------------------===// -// Loop Strength Reduction hooks -//===----------------------------------------------------------------------===// - -/// isLegalAddressingMode - Return true if the addressing mode represented -/// by AM is legal for this target, for a load/store of the specified type. -bool TargetLowering::isLegalAddressingMode(const AddrMode &AM, - Type *Ty) const { - // The default implementation of this implements a conservative RISCy, r+r and - // r+i addr mode. - - // Allows a sign-extended 16-bit immediate field. - if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) - return false; - - // No global is ever allowed as a base. - if (AM.BaseGV) - return false; - - // Only support r+r, - switch (AM.Scale) { - case 0: // "r+i" or just "i", depending on HasBaseReg. - break; - case 1: - if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. - return false; - // Otherwise we have r+r or r+i. - break; - case 2: - if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. - return false; - // Allow 2*r as r+r. - break; - } - - return true; -} - /// BuildExactDiv - Given an exact SDIV by a constant, create a multiplication /// with the multiplicative inverse of the constant. SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, @@ -3325,7 +2467,7 @@ SDValue TargetLowering::BuildExactSDIV(SDValue Op1, SDValue Op2, DebugLoc dl, /// SDValue TargetLowering:: BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector* Created) const { + std::vector *Created) const { EVT VT = N->getValueType(0); DebugLoc dl= N->getDebugLoc(); @@ -3385,7 +2527,7 @@ BuildSDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, /// SDValue TargetLowering:: BuildUDIV(SDNode *N, SelectionDAG &DAG, bool IsAfterLegalization, - std::vector* Created) const { + std::vector *Created) const { EVT VT = N->getValueType(0); DebugLoc dl = N->getDebugLoc(); diff --git a/lib/CodeGen/ShadowStackGC.cpp b/lib/CodeGen/ShadowStackGC.cpp index 8a6b120f97e6..10f64c709c7a 100644 --- a/lib/CodeGen/ShadowStackGC.cpp +++ b/lib/CodeGen/ShadowStackGC.cpp @@ -26,12 +26,12 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "shadowstackgc" -#include "llvm/IRBuilder.h" -#include "llvm/IntrinsicInst.h" -#include "llvm/Module.h" +#include "llvm/CodeGen/GCs.h" #include "llvm/ADT/StringExtras.h" #include "llvm/CodeGen/GCStrategy.h" -#include "llvm/CodeGen/GCs.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CallSite.h" using namespace llvm; diff --git a/lib/CodeGen/ShrinkWrapping.cpp b/lib/CodeGen/ShrinkWrapping.cpp index 4fbe1b360577..9ab491808fe5 100644 --- a/lib/CodeGen/ShrinkWrapping.cpp +++ b/lib/CodeGen/ShrinkWrapping.cpp @@ -35,22 +35,21 @@ #define DEBUG_TYPE "shrink-wrap" #include "PrologEpilogInserter.h" -#include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineLoopInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/PostOrderIterator.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SparseBitVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/MachineDominators.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineInstr.h" +#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" -#include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; diff --git a/lib/CodeGen/SjLjEHPrepare.cpp b/lib/CodeGen/SjLjEHPrepare.cpp index 4b566fcba931..3903743878b4 100644 --- a/lib/CodeGen/SjLjEHPrepare.cpp +++ b/lib/CodeGen/SjLjEHPrepare.cpp @@ -13,24 +13,24 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "sjljehprepare" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/IRBuilder.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/LLVMContext.h" -#include "llvm/Module.h" -#include "llvm/Pass.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" -#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" @@ -43,7 +43,7 @@ STATISTIC(NumSpilled, "Number of registers live across unwind edges"); namespace { class SjLjEHPrepare : public FunctionPass { - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Type *FunctionContextTy; Constant *RegisterFn; Constant *UnregisterFn; @@ -58,7 +58,7 @@ namespace { AllocaInst *FuncCtx; public: static char ID; // Pass identification, replacement for typeid - explicit SjLjEHPrepare(const TargetLowering *tli = NULL) + explicit SjLjEHPrepare(const TargetLoweringBase *tli = NULL) : FunctionPass(ID), TLI(tli) { } bool doInitialization(Module &M); bool runOnFunction(Function &F); @@ -82,7 +82,7 @@ namespace { char SjLjEHPrepare::ID = 0; // Public Interface To the SjLjEHPrepare pass. -FunctionPass *llvm::createSjLjEHPreparePass(const TargetLowering *TLI) { +FunctionPass *llvm::createSjLjEHPreparePass(const TargetLoweringBase *TLI) { return new SjLjEHPrepare(TLI); } // doInitialization - Set up decalarations and types needed to process @@ -379,13 +379,22 @@ void SjLjEHPrepare::lowerAcrossUnwindEdges(Function &F, /// the function context and marking the call sites with the appropriate /// values. These values are used by the DWARF EH emitter. bool SjLjEHPrepare::setupEntryBlockAndCallSites(Function &F) { - SmallVector Returns; - SmallVector Invokes; + SmallVector Returns; + SmallVector Invokes; SmallSetVector LPads; // Look through the terminators of the basic blocks to find invokes. for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) if (InvokeInst *II = dyn_cast(BB->getTerminator())) { + if (Function *Callee = II->getCalledFunction()) + if (Callee->isIntrinsic() && + Callee->getIntrinsicID() == Intrinsic::donothing) { + // Remove the NOP invoke. + BranchInst::Create(II->getNormalDest(), II); + II->eraseFromParent(); + continue; + } + Invokes.push_back(II); LPads.insert(II->getUnwindDest()->getLandingPadInst()); } else if (ReturnInst *RI = dyn_cast(BB->getTerminator())) { diff --git a/lib/CodeGen/SlotIndexes.cpp b/lib/CodeGen/SlotIndexes.cpp index 95faafab45a9..20049a89d15d 100644 --- a/lib/CodeGen/SlotIndexes.cpp +++ b/lib/CodeGen/SlotIndexes.cpp @@ -142,6 +142,76 @@ void SlotIndexes::renumberIndexes(IndexList::iterator curItr) { ++NumLocalRenum; } +// Repair indexes after adding and removing instructions. +void SlotIndexes::repairIndexesInRange(MachineBasicBlock *MBB, + MachineBasicBlock::iterator Begin, + MachineBasicBlock::iterator End) { + // FIXME: Is this really necessary? The only caller repairIntervalsForRange() + // does the same thing. + // Find anchor points, which are at the beginning/end of blocks or at + // instructions that already have indexes. + while (Begin != MBB->begin() && !hasIndex(Begin)) + --Begin; + while (End != MBB->end() && !hasIndex(End)) + ++End; + + bool includeStart = (Begin == MBB->begin()); + SlotIndex startIdx; + if (includeStart) + startIdx = getMBBStartIdx(MBB); + else + startIdx = getInstructionIndex(Begin); + + SlotIndex endIdx; + if (End == MBB->end()) + endIdx = getMBBEndIdx(MBB); + else + endIdx = getInstructionIndex(End); + + // FIXME: Conceptually, this code is implementing an iterator on MBB that + // optionally includes an additional position prior to MBB->begin(), indicated + // by the includeStart flag. This is done so that we can iterate MIs in a MBB + // in parallel with SlotIndexes, but there should be a better way to do this. + IndexList::iterator ListB = startIdx.listEntry(); + IndexList::iterator ListI = endIdx.listEntry(); + MachineBasicBlock::iterator MBBI = End; + bool pastStart = false; + while (ListI != ListB || MBBI != Begin || (includeStart && !pastStart)) { + assert(ListI->getIndex() >= startIdx.getIndex() && + (includeStart || !pastStart) && + "Decremented past the beginning of region to repair."); + + MachineInstr *SlotMI = ListI->getInstr(); + MachineInstr *MI = (MBBI != MBB->end() && !pastStart) ? MBBI : 0; + bool MBBIAtBegin = MBBI == Begin && (!includeStart || pastStart); + + if (SlotMI == MI && !MBBIAtBegin) { + --ListI; + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else if (MI && mi2iMap.find(MI) == mi2iMap.end()) { + if (MBBI != Begin) + --MBBI; + else + pastStart = true; + } else { + --ListI; + if (SlotMI) + removeMachineInstrFromMaps(SlotMI); + } + } + + // In theory this could be combined with the previous loop, but it is tricky + // to update the IndexList while we are iterating it. + for (MachineBasicBlock::iterator I = End; I != Begin;) { + --I; + MachineInstr *MI = I; + if (!MI->isDebugValue() && mi2iMap.find(MI) == mi2iMap.end()) + insertMachineInstrInMaps(MI); + } +} #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) void SlotIndexes::dump() const { diff --git a/lib/CodeGen/SpillPlacement.cpp b/lib/CodeGen/SpillPlacement.cpp index 320128a999ea..c5bbba3ffccc 100644 --- a/lib/CodeGen/SpillPlacement.cpp +++ b/lib/CodeGen/SpillPlacement.cpp @@ -29,6 +29,7 @@ #define DEBUG_TYPE "spillplacement" #include "SpillPlacement.h" +#include "llvm/ADT/BitVector.h" #include "llvm/CodeGen/EdgeBundles.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineBasicBlock.h" diff --git a/lib/CodeGen/Spiller.cpp b/lib/CodeGen/Spiller.cpp index 4cd22eb60f55..209792fd407b 100644 --- a/lib/CodeGen/Spiller.cpp +++ b/lib/CodeGen/Spiller.cpp @@ -10,7 +10,6 @@ #define DEBUG_TYPE "spiller" #include "Spiller.h" -#include "VirtRegMap.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" #include "llvm/CodeGen/LiveStackAnalysis.h" @@ -19,12 +18,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; diff --git a/lib/CodeGen/SplitKit.cpp b/lib/CodeGen/SplitKit.cpp index dca15ee7580f..0a3818e43ff9 100644 --- a/lib/CodeGen/SplitKit.cpp +++ b/lib/CodeGen/SplitKit.cpp @@ -14,7 +14,6 @@ #define DEBUG_TYPE "regalloc" #include "SplitKit.h" -#include "VirtRegMap.h" #include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveRangeEdit.h" @@ -22,6 +21,7 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineLoopInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" diff --git a/lib/CodeGen/StackColoring.cpp b/lib/CodeGen/StackColoring.cpp index 1cbee843a125..a789a2596dbf 100644 --- a/lib/CodeGen/StackColoring.cpp +++ b/lib/CodeGen/StackColoring.cpp @@ -22,39 +22,37 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackcoloring" -#include "MachineTraceMetrics.h" -#include "llvm/Function.h" -#include "llvm/Module.h" +#include "llvm/CodeGen/Passes.h" #include "llvm/ADT/BitVector.h" -#include "llvm/Analysis/Dominators.h" -#include "llvm/Analysis/ValueTracking.h" #include "llvm/ADT/DepthFirstIterator.h" #include "llvm/ADT/PostOrderIterator.h" #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SparseSet.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/Dominators.h" +#include "llvm/Analysis/ValueTracking.h" #include "llvm/CodeGen/LiveInterval.h" -#include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineBasicBlock.h" #include "llvm/CodeGen/MachineBranchProbabilityInfo.h" #include "llvm/CodeGen/MachineDominators.h" -#include "llvm/CodeGen/MachineBasicBlock.h" +#include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/Passes.h" #include "llvm/CodeGen/SlotIndexes.h" #include "llvm/DebugInfo.h" -#include "llvm/Instructions.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; @@ -69,14 +67,14 @@ DisableColoring("no-stack-coloring", /// code. If this flag is enabled, we try to save the user. static cl::opt ProtectFromEscapedAllocas("protect-from-escaped-allocas", - cl::init(false), cl::Hidden, - cl::desc("Do not optimize lifetime zones that are broken")); + cl::init(false), cl::Hidden, + cl::desc("Do not optimize lifetime zones that " + "are broken")); STATISTIC(NumMarkerSeen, "Number of lifetime markers found."); STATISTIC(StackSpaceSaved, "Number of bytes saved due to merging slots."); STATISTIC(StackSlotMerged, "Number of stack slot merged."); -STATISTIC(EscapedAllocas, - "Number of allocas that escaped the lifetime region"); +STATISTIC(EscapedAllocas, "Number of allocas that escaped the lifetime region"); //===----------------------------------------------------------------------===// // StackColoring Pass @@ -104,12 +102,13 @@ class StackColoring : public MachineFunctionPass { }; /// Maps active slots (per bit) for each basic block. - DenseMap BlockLiveness; + typedef DenseMap LivenessMap; + LivenessMap BlockLiveness; /// Maps serial numbers to basic blocks. - DenseMap BasicBlocks; + DenseMap BasicBlocks; /// Maps basic blocks to a serial number. - SmallVector BasicBlockNumbering; + SmallVector BasicBlockNumbering; /// Maps liveness intervals for each slot. SmallVector Intervals; @@ -146,7 +145,7 @@ public: private: /// Debug. - void dump(); + void dump() const; /// Removes all of the lifetime marker instructions from the function. /// \returns true if any markers were removed. @@ -201,31 +200,35 @@ void StackColoring::getAnalysisUsage(AnalysisUsage &AU) const { MachineFunctionPass::getAnalysisUsage(AU); } -void StackColoring::dump() { +void StackColoring::dump() const { for (df_iterator FI = df_begin(MF), FE = df_end(MF); FI != FE; ++FI) { - unsigned Num = BasicBlocks[*FI]; - DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); - Num = 0; + DEBUG(dbgs()<<"Inspecting block #"<getName()<<"]\n"); + + LivenessMap::const_iterator BI = BlockLiveness.find(*FI); + assert(BI != BlockLiveness.end() && "Block not found"); + const BlockLifetimeInfo &BlockInfo = BI->second; + DEBUG(dbgs()<<"BEGIN : {"); - for (unsigned i=0; i < BlockLiveness[*FI].Begin.size(); ++i) - DEBUG(dbgs()<begin(), BE = (*FI)->end(); BI != BE; ++BI) { @@ -256,7 +262,7 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { Markers.push_back(BI); bool IsStart = BI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &MI = BI->getOperand(0); + const MachineOperand &MI = BI->getOperand(0); unsigned Slot = MI.getIndex(); MarkersFound++; @@ -268,15 +274,15 @@ unsigned StackColoring::collectMarkers(unsigned NumSlot) { } if (IsStart) { - BlockLiveness[*FI].Begin.set(Slot); + BlockInfo.Begin.set(Slot); } else { - if (BlockLiveness[*FI].Begin.test(Slot)) { + if (BlockInfo.Begin.test(Slot)) { // Allocas that start and end within a single block are handled // specially when computing the LiveIntervals to avoid pessimizing // the liveness propagation. - BlockLiveness[*FI].Begin.reset(Slot); + BlockInfo.Begin.reset(Slot); } else { - BlockLiveness[*FI].End.set(Slot); + BlockInfo.End.set(Slot); } } } @@ -293,47 +299,58 @@ void StackColoring::calculateLocalLiveness() { // formulation, and END is equivalent to GEN. The result of this computation // is a map from blocks to bitvectors where the bitvectors represent which // allocas are live in/out of that block. - SmallPtrSet BBSet(BasicBlockNumbering.begin(), - BasicBlockNumbering.end()); + SmallPtrSet BBSet(BasicBlockNumbering.begin(), + BasicBlockNumbering.end()); unsigned NumSSMIters = 0; bool changed = true; while (changed) { changed = false; ++NumSSMIters; - SmallPtrSet NextBBSet; + SmallPtrSet NextBBSet; - for (SmallVector::iterator + for (SmallVector::iterator PI = BasicBlockNumbering.begin(), PE = BasicBlockNumbering.end(); PI != PE; ++PI) { - MachineBasicBlock *BB = *PI; + const MachineBasicBlock *BB = *PI; if (!BBSet.count(BB)) continue; + // Use an iterator to avoid repeated lookups. + LivenessMap::iterator BI = BlockLiveness.find(BB); + assert(BI != BlockLiveness.end() && "Block not found"); + BlockLifetimeInfo &BlockInfo = BI->second; + BitVector LocalLiveIn; BitVector LocalLiveOut; // Forward propagation from begins to ends. - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), - PE = BB->pred_end(); PI != PE; ++PI) - LocalLiveIn |= BlockLiveness[*PI].LiveOut; - LocalLiveIn |= BlockLiveness[BB].End; - LocalLiveIn.reset(BlockLiveness[BB].Begin); + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), + PE = BB->pred_end(); PI != PE; ++PI) { + LivenessMap::const_iterator I = BlockLiveness.find(*PI); + assert(I != BlockLiveness.end() && "Predecessor not found"); + LocalLiveIn |= I->second.LiveOut; + } + LocalLiveIn |= BlockInfo.End; + LocalLiveIn.reset(BlockInfo.Begin); // Reverse propagation from ends to begins. - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), - SE = BB->succ_end(); SI != SE; ++SI) - LocalLiveOut |= BlockLiveness[*SI].LiveIn; - LocalLiveOut |= BlockLiveness[BB].Begin; - LocalLiveOut.reset(BlockLiveness[BB].End); + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), + SE = BB->succ_end(); SI != SE; ++SI) { + LivenessMap::const_iterator I = BlockLiveness.find(*SI); + assert(I != BlockLiveness.end() && "Successor not found"); + LocalLiveOut |= I->second.LiveIn; + } + LocalLiveOut |= BlockInfo.Begin; + LocalLiveOut.reset(BlockInfo.End); LocalLiveIn |= LocalLiveOut; LocalLiveOut |= LocalLiveIn; // After adopting the live bits, we need to turn-off the bits which // are de-activated in this block. - LocalLiveOut.reset(BlockLiveness[BB].End); - LocalLiveIn.reset(BlockLiveness[BB].Begin); + LocalLiveOut.reset(BlockInfo.End); + LocalLiveIn.reset(BlockInfo.Begin); // If we have both BEGIN and END markers in the same basic block then // we know that the BEGIN marker comes after the END, because we already @@ -342,25 +359,25 @@ void StackColoring::calculateLocalLiveness() { // Want to enable the LIVE_IN and LIVE_OUT of slots that have both // BEGIN and END because it means that the value lives before and after // this basic block. - BitVector LocalEndBegin = BlockLiveness[BB].End; - LocalEndBegin &= BlockLiveness[BB].Begin; + BitVector LocalEndBegin = BlockInfo.End; + LocalEndBegin &= BlockInfo.Begin; LocalLiveIn |= LocalEndBegin; LocalLiveOut |= LocalEndBegin; - if (LocalLiveIn.test(BlockLiveness[BB].LiveIn)) { + if (LocalLiveIn.test(BlockInfo.LiveIn)) { changed = true; - BlockLiveness[BB].LiveIn |= LocalLiveIn; + BlockInfo.LiveIn |= LocalLiveIn; - for (MachineBasicBlock::pred_iterator PI = BB->pred_begin(), + for (MachineBasicBlock::const_pred_iterator PI = BB->pred_begin(), PE = BB->pred_end(); PI != PE; ++PI) NextBBSet.insert(*PI); } - if (LocalLiveOut.test(BlockLiveness[BB].LiveOut)) { + if (LocalLiveOut.test(BlockInfo.LiveOut)) { changed = true; - BlockLiveness[BB].LiveOut |= LocalLiveOut; + BlockInfo.LiveOut |= LocalLiveOut; - for (MachineBasicBlock::succ_iterator SI = BB->succ_begin(), + for (MachineBasicBlock::const_succ_iterator SI = BB->succ_begin(), SE = BB->succ_end(); SI != SE; ++SI) NextBBSet.insert(*SI); } @@ -384,9 +401,9 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { Finishes.resize(NumSlots); // Create the interval for the basic blocks with lifetime markers in them. - for (SmallVector::iterator it = Markers.begin(), + for (SmallVectorImpl::const_iterator it = Markers.begin(), e = Markers.end(); it != e; ++it) { - MachineInstr *MI = *it; + const MachineInstr *MI = *it; if (MI->getParent() != MBB) continue; @@ -395,7 +412,7 @@ void StackColoring::calculateLiveIntervals(unsigned NumSlots) { "Invalid Lifetime marker"); bool IsStart = MI->getOpcode() == TargetOpcode::LIFETIME_START; - MachineOperand &Mo = MI->getOperand(0); + const MachineOperand &Mo = MI->getOperand(0); int Slot = Mo.getIndex(); assert(Slot >= 0 && "Invalid slot"); @@ -482,7 +499,7 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { // Keep a list of *allocas* which need to be remapped. DenseMap Allocas; - for (DenseMap::iterator it = SlotRemap.begin(), + for (DenseMap::const_iterator it = SlotRemap.begin(), e = SlotRemap.end(); it != e; ++it) { const AllocaInst *From = MFI->getObjectAllocation(it->first); const AllocaInst *To = MFI->getObjectAllocation(it->second); @@ -560,7 +577,7 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { SlotIndex Index = Indexes->getInstructionIndex(I); LiveInterval *Interval = Intervals[FromSlot]; assert(Interval->find(Index) != Interval->end() && - "Found instruction usage outside of live range."); + "Found instruction usage outside of live range."); } #endif @@ -577,8 +594,8 @@ void StackColoring::remapInstructions(DenseMap &SlotRemap) { } void StackColoring::removeInvalidSlotRanges() { - MachineFunction::iterator BB, BBE; - MachineBasicBlock::iterator I, IE; + MachineFunction::const_iterator BB, BBE; + MachineBasicBlock::const_iterator I, IE; for (BB = MF->begin(), BBE = MF->end(); BB != BBE; ++BB) for (I = BB->begin(), IE = BB->end(); I != IE; ++I) { @@ -597,7 +614,7 @@ void StackColoring::removeInvalidSlotRanges() { // Check all of the machine operands. for (unsigned i = 0 ; i < I->getNumOperands(); ++i) { - MachineOperand &MO = I->getOperand(i); + const MachineOperand &MO = I->getOperand(i); if (!MO.isFI()) continue; @@ -720,11 +737,13 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // and continue. // Sort the slots according to their size. Place unused slots at the end. - std::sort(SortedSlots.begin(), SortedSlots.end(), SlotSizeSorter(MFI)); + // Use stable sort to guarantee deterministic code generation. + std::stable_sort(SortedSlots.begin(), SortedSlots.end(), + SlotSizeSorter(MFI)); - bool Chanded = true; - while (Chanded) { - Chanded = false; + bool Changed = true; + while (Changed) { + Changed = false; for (unsigned I = 0; I < NumSlots; ++I) { if (SortedSlots[I] == -1) continue; @@ -741,7 +760,7 @@ bool StackColoring::runOnMachineFunction(MachineFunction &Func) { // Merge disjoint slots. if (!First->overlaps(*Second)) { - Chanded = true; + Changed = true; First->MergeRangesInAsValue(*Second, First->getValNumInfo(0)); SlotRemap[SecondSlot] = FirstSlot; SortedSlots[J] = -1; diff --git a/lib/CodeGen/StackProtector.cpp b/lib/CodeGen/StackProtector.cpp index 31e9ec0ac0b9..fbef34772b08 100644 --- a/lib/CodeGen/StackProtector.cpp +++ b/lib/CodeGen/StackProtector.cpp @@ -16,33 +16,44 @@ #define DEBUG_TYPE "stack-protector" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/Triple.h" #include "llvm/Analysis/Dominators.h" -#include "llvm/Attributes.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/Instructions.h" -#include "llvm/Intrinsics.h" -#include "llvm/Module.h" +#include "llvm/IR/Attributes.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Intrinsics.h" +#include "llvm/IR/Module.h" #include "llvm/Pass.h" #include "llvm/Support/CommandLine.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetOptions.h" -#include "llvm/ADT/Triple.h" using namespace llvm; +STATISTIC(NumFunProtected, "Number of functions protected"); +STATISTIC(NumAddrTaken, "Number of local variables that have their address" + " taken."); + namespace { class StackProtector : public FunctionPass { /// TLI - Keep a pointer of a TargetLowering to consult for determining /// target type sizes. - const TargetLowering *TLI; + const TargetLoweringBase *TLI; Function *F; Module *M; DominatorTree *DT; + /// VisitedPHIs - The set of PHI nodes visited when determining + /// if a variable's reference has been taken. This set + /// is maintained to ensure we don't visit the same PHI node multiple + /// times. + SmallPtrSet VisitedPHIs; + /// InsertStackProtectors - Insert code into the prologue and epilogue of /// the function. /// @@ -58,17 +69,21 @@ namespace { /// ContainsProtectableArray - Check whether the type either is an array or /// contains an array of sufficient size so that we need stack protectors /// for it. - bool ContainsProtectableArray(Type *Ty, bool InStruct = false) const; + bool ContainsProtectableArray(Type *Ty, bool Strong = false, + bool InStruct = false) const; + + /// \brief Check whether a stack allocation has its address taken. + bool HasAddressTaken(const Instruction *AI); /// RequiresStackProtector - Check whether or not this function needs a /// stack protector based upon the stack protector level. - bool RequiresStackProtector() const; + bool RequiresStackProtector(); public: static char ID; // Pass identification, replacement for typeid. StackProtector() : FunctionPass(ID), TLI(0) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } - StackProtector(const TargetLowering *tli) + StackProtector(const TargetLoweringBase *tli) : FunctionPass(ID), TLI(tli) { initializeStackProtectorPass(*PassRegistry::getPassRegistry()); } @@ -85,7 +100,7 @@ char StackProtector::ID = 0; INITIALIZE_PASS(StackProtector, "stack-protector", "Insert stack protectors", false, false) -FunctionPass *llvm::createStackProtectorPass(const TargetLowering *tli) { +FunctionPass *llvm::createStackProtectorPass(const TargetLoweringBase *tli) { return new StackProtector(tli); } @@ -96,15 +111,21 @@ bool StackProtector::runOnFunction(Function &Fn) { if (!RequiresStackProtector()) return false; + ++NumFunProtected; return InsertStackProtectors(); } /// ContainsProtectableArray - Check whether the type either is an array or /// contains a char array of sufficient size so that we need stack protectors /// for it. -bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { +bool StackProtector::ContainsProtectableArray(Type *Ty, bool Strong, + bool InStruct) const { if (!Ty) return false; if (ArrayType *AT = dyn_cast(Ty)) { + // In strong mode any array, regardless of type and size, triggers a + // protector + if (Strong) + return true; const TargetMachine &TM = TLI->getTargetMachine(); if (!AT->getElementType()->isIntegerTy(8)) { Triple Trip(TM.getTargetTriple()); @@ -126,37 +147,103 @@ bool StackProtector::ContainsProtectableArray(Type *Ty, bool InStruct) const { for (StructType::element_iterator I = ST->element_begin(), E = ST->element_end(); I != E; ++I) - if (ContainsProtectableArray(*I, true)) + if (ContainsProtectableArray(*I, Strong, true)) return true; return false; } -/// RequiresStackProtector - Check whether or not this function needs a stack -/// protector based upon the stack protector level. The heuristic we use is to -/// add a guard variable to functions that call alloca, and functions with -/// buffers larger than SSPBufferSize bytes. -bool StackProtector::RequiresStackProtector() const { - if (F->getFnAttributes().hasAttribute(Attributes::StackProtectReq)) - return true; +bool StackProtector::HasAddressTaken(const Instruction *AI) { + for (Value::const_use_iterator UI = AI->use_begin(), UE = AI->use_end(); + UI != UE; ++UI) { + const User *U = *UI; + if (const StoreInst *SI = dyn_cast(U)) { + if (AI == SI->getValueOperand()) + return true; + } else if (const PtrToIntInst *SI = dyn_cast(U)) { + if (AI == SI->getOperand(0)) + return true; + } else if (isa(U)) { + return true; + } else if (isa(U)) { + return true; + } else if (const SelectInst *SI = dyn_cast(U)) { + if (HasAddressTaken(SI)) + return true; + } else if (const PHINode *PN = dyn_cast(U)) { + // Keep track of what PHI nodes we have already visited to ensure + // they are only visited once. + if (VisitedPHIs.insert(PN)) + if (HasAddressTaken(PN)) + return true; + } else if (const GetElementPtrInst *GEP = dyn_cast(U)) { + if (HasAddressTaken(GEP)) + return true; + } else if (const BitCastInst *BI = dyn_cast(U)) { + if (HasAddressTaken(BI)) + return true; + } + } + return false; +} - if (!F->getFnAttributes().hasAttribute(Attributes::StackProtect)) +/// \brief Check whether or not this function needs a stack protector based +/// upon the stack protector level. +/// +/// We use two heuristics: a standard (ssp) and strong (sspstrong). +/// The standard heuristic which will add a guard variable to functions that +/// call alloca with a either a variable size or a size >= SSPBufferSize, +/// functions with character buffers larger than SSPBufferSize, and functions +/// with aggregates containing character buffers larger than SSPBufferSize. The +/// strong heuristic will add a guard variables to functions that call alloca +/// regardless of size, functions with any buffer regardless of type and size, +/// functions with aggregates that contain any buffer regardless of type and +/// size, and functions that contain stack-based variables that have had their +/// address taken. +bool StackProtector::RequiresStackProtector() { + bool Strong = false; + if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectReq)) + return true; + else if (F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtectStrong)) + Strong = true; + else if (!F->getAttributes().hasAttribute(AttributeSet::FunctionIndex, + Attribute::StackProtect)) return false; for (Function::iterator I = F->begin(), E = F->end(); I != E; ++I) { BasicBlock *BB = I; for (BasicBlock::iterator - II = BB->begin(), IE = BB->end(); II != IE; ++II) + II = BB->begin(), IE = BB->end(); II != IE; ++II) { if (AllocaInst *AI = dyn_cast(II)) { - if (AI->isArrayAllocation()) - // This is a call to alloca with a variable size. Emit stack - // protectors. + if (AI->isArrayAllocation()) { + // SSP-Strong: Enable protectors for any call to alloca, regardless + // of size. + if (Strong) + return true; + + if (const ConstantInt *CI = + dyn_cast(AI->getArraySize())) { + unsigned BufferSize = TLI->getTargetMachine().Options.SSPBufferSize; + if (CI->getLimitedValue(BufferSize) >= BufferSize) + // A call to alloca with size >= SSPBufferSize requires + // stack protectors. + return true; + } else // A call to alloca with a variable size requires protectors. + return true; + } + + if (ContainsProtectableArray(AI->getAllocatedType(), Strong)) return true; - if (ContainsProtectableArray(AI->getAllocatedType())) + if (Strong && HasAddressTaken(AI)) { + ++NumAddrTaken; return true; + } } + } } return false; diff --git a/lib/CodeGen/StackSlotColoring.cpp b/lib/CodeGen/StackSlotColoring.cpp index d349abc35774..f9515610d7e9 100644 --- a/lib/CodeGen/StackSlotColoring.cpp +++ b/lib/CodeGen/StackSlotColoring.cpp @@ -12,8 +12,11 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "stackslotcoloring" -#include "llvm/Module.h" #include "llvm/CodeGen/Passes.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -22,14 +25,11 @@ #include "llvm/CodeGen/MachineMemOperand.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/IR/Module.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" #include using namespace llvm; diff --git a/lib/CodeGen/StrongPHIElimination.cpp b/lib/CodeGen/StrongPHIElimination.cpp index 39fd600d4abf..b337c5393343 100644 --- a/lib/CodeGen/StrongPHIElimination.cpp +++ b/lib/CodeGen/StrongPHIElimination.cpp @@ -39,17 +39,17 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "strongphielim" -#include "PHIEliminationUtils.h" #include "llvm/CodeGen/Passes.h" +#include "PHIEliminationUtils.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/Statistic.h" #include "llvm/Support/Debug.h" +#include "llvm/Target/TargetInstrInfo.h" using namespace llvm; namespace { diff --git a/lib/CodeGen/TailDuplication.cpp b/lib/CodeGen/TailDuplication.cpp index 1497d1ba6287..1ec88172a0b0 100644 --- a/lib/CodeGen/TailDuplication.cpp +++ b/lib/CodeGen/TailDuplication.cpp @@ -13,25 +13,25 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "tailduplication" -#include "llvm/Function.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/CodeGen/MachineModuleInfo.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/ADT/OwningPtr.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/MachineSSAUpdater.h" #include "llvm/CodeGen/RegisterScavenging.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/IR/Function.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/DenseSet.h" -#include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/SetVector.h" -#include "llvm/ADT/Statistic.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; STATISTIC(NumTails , "Number of tails duplicated"); @@ -461,6 +461,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, II != EE; ++II) { if (!II->isPHI()) break; + MachineInstrBuilder MIB(*FromBB->getParent(), II); unsigned Idx = 0; for (unsigned i = 1, e = II->getNumOperands(); i != e; i += 2) { MachineOperand &MO = II->getOperand(i+1); @@ -508,8 +509,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { - II->addOperand(MachineOperand::CreateReg(SrcReg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + MIB.addReg(SrcReg).addMBB(SrcBB); } } } else { @@ -521,8 +521,7 @@ TailDuplicatePass::UpdateSuccessorsPHIs(MachineBasicBlock *FromBB, bool isDead, II->getOperand(Idx+1).setMBB(SrcBB); Idx = 0; } else { - II->addOperand(MachineOperand::CreateReg(Reg, false)); - II->addOperand(MachineOperand::CreateMBB(SrcBB)); + MIB.addReg(Reg).addMBB(SrcBB); } } } @@ -552,8 +551,8 @@ TailDuplicatePass::shouldTailDuplicate(const MachineFunction &MF, // compensate for the duplication. unsigned MaxDuplicateCount; if (TailDuplicateSize.getNumOccurrences() == 0 && - MF.getFunction()->getFnAttributes(). - hasAttribute(Attributes::OptimizeForSize)) + MF.getFunction()->getAttributes(). + hasAttribute(AttributeSet::FunctionIndex, Attribute::OptimizeForSize)) MaxDuplicateCount = 1; else MaxDuplicateCount = TailDuplicateSize; diff --git a/lib/CodeGen/TargetFrameLoweringImpl.cpp b/lib/CodeGen/TargetFrameLoweringImpl.cpp index cadb87815dbe..883e9d1846d9 100644 --- a/lib/CodeGen/TargetFrameLoweringImpl.cpp +++ b/lib/CodeGen/TargetFrameLoweringImpl.cpp @@ -11,12 +11,11 @@ // //===----------------------------------------------------------------------===// +#include "llvm/Target/TargetFrameLowering.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineFunction.h" -#include "llvm/Target/TargetFrameLowering.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" - #include using namespace llvm; diff --git a/lib/CodeGen/TargetInstrInfo.cpp b/lib/CodeGen/TargetInstrInfo.cpp new file mode 100644 index 000000000000..20eb91879317 --- /dev/null +++ b/lib/CodeGen/TargetInstrInfo.cpp @@ -0,0 +1,739 @@ +//===-- TargetInstrInfo.cpp - Target Instruction Information --------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetInstrInfo class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineMemOperand.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/PseudoSourceValue.h" +#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCInstrItineraries.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetLowering.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +using namespace llvm; + +static cl::opt DisableHazardRecognizer( + "disable-sched-hazard", cl::Hidden, cl::init(false), + cl::desc("Disable hazard detection during preRA scheduling")); + +TargetInstrInfo::~TargetInstrInfo() { +} + +const TargetRegisterClass* +TargetInstrInfo::getRegClass(const MCInstrDesc &MCID, unsigned OpNum, + const TargetRegisterInfo *TRI, + const MachineFunction &MF) const { + if (OpNum >= MCID.getNumOperands()) + return 0; + + short RegClass = MCID.OpInfo[OpNum].RegClass; + if (MCID.OpInfo[OpNum].isLookupPtrRegClass()) + return TRI->getPointerRegClass(MF, RegClass); + + // Instructions like INSERT_SUBREG do not have fixed register classes. + if (RegClass < 0) + return 0; + + // Otherwise just look it up normally. + return TRI->getRegClass(RegClass); +} + +/// insertNoop - Insert a noop into the instruction stream at the specified +/// point. +void TargetInstrInfo::insertNoop(MachineBasicBlock &MBB, + MachineBasicBlock::iterator MI) const { + llvm_unreachable("Target didn't implement insertNoop!"); +} + +/// Measure the specified inline asm to determine an approximation of its +/// length. +/// Comments (which run till the next SeparatorString or newline) do not +/// count as an instruction. +/// Any other non-whitespace text is considered an instruction, with +/// multiple instructions separated by SeparatorString or newlines. +/// Variable-length instructions are not handled here; this function +/// may be overloaded in the target code to do that. +unsigned TargetInstrInfo::getInlineAsmLength(const char *Str, + const MCAsmInfo &MAI) const { + + + // Count the number of instructions in the asm. + bool atInsnStart = true; + unsigned Length = 0; + for (; *Str; ++Str) { + if (*Str == '\n' || strncmp(Str, MAI.getSeparatorString(), + strlen(MAI.getSeparatorString())) == 0) + atInsnStart = true; + if (atInsnStart && !std::isspace(static_cast(*Str))) { + Length += MAI.getMaxInstLength(); + atInsnStart = false; + } + if (atInsnStart && strncmp(Str, MAI.getCommentString(), + strlen(MAI.getCommentString())) == 0) + atInsnStart = false; + } + + return Length; +} + +/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything +/// after it, replacing it with an unconditional branch to NewDest. +void +TargetInstrInfo::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, + MachineBasicBlock *NewDest) const { + MachineBasicBlock *MBB = Tail->getParent(); + + // Remove all the old successors of MBB from the CFG. + while (!MBB->succ_empty()) + MBB->removeSuccessor(MBB->succ_begin()); + + // Remove all the dead instructions from the end of MBB. + MBB->erase(Tail, MBB->end()); + + // If MBB isn't immediately before MBB, insert a branch to it. + if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) + InsertBranch(*MBB, NewDest, 0, SmallVector(), + Tail->getDebugLoc()); + MBB->addSuccessor(NewDest); +} + +// commuteInstruction - The default implementation of this method just exchanges +// the two operands returned by findCommutedOpIndices. +MachineInstr *TargetInstrInfo::commuteInstruction(MachineInstr *MI, + bool NewMI) const { + const MCInstrDesc &MCID = MI->getDesc(); + bool HasDef = MCID.getNumDefs(); + if (HasDef && !MI->getOperand(0).isReg()) + // No idea how to commute this instruction. Target should implement its own. + return 0; + unsigned Idx1, Idx2; + if (!findCommutedOpIndices(MI, Idx1, Idx2)) { + std::string msg; + raw_string_ostream Msg(msg); + Msg << "Don't know how to commute: " << *MI; + report_fatal_error(Msg.str()); + } + + assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && + "This only knows how to commute register operands so far"); + unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; + unsigned Reg1 = MI->getOperand(Idx1).getReg(); + unsigned Reg2 = MI->getOperand(Idx2).getReg(); + unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; + unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); + unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); + bool Reg1IsKill = MI->getOperand(Idx1).isKill(); + bool Reg2IsKill = MI->getOperand(Idx2).isKill(); + // If destination is tied to either of the commuted source register, then + // it must be updated. + if (HasDef && Reg0 == Reg1 && + MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { + Reg2IsKill = false; + Reg0 = Reg2; + SubReg0 = SubReg2; + } else if (HasDef && Reg0 == Reg2 && + MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { + Reg1IsKill = false; + Reg0 = Reg1; + SubReg0 = SubReg1; + } + + if (NewMI) { + // Create a new instruction. + MachineFunction &MF = *MI->getParent()->getParent(); + MI = MF.CloneMachineInstr(MI); + } + + if (HasDef) { + MI->getOperand(0).setReg(Reg0); + MI->getOperand(0).setSubReg(SubReg0); + } + MI->getOperand(Idx2).setReg(Reg1); + MI->getOperand(Idx1).setReg(Reg2); + MI->getOperand(Idx2).setSubReg(SubReg1); + MI->getOperand(Idx1).setSubReg(SubReg2); + MI->getOperand(Idx2).setIsKill(Reg1IsKill); + MI->getOperand(Idx1).setIsKill(Reg2IsKill); + return MI; +} + +/// findCommutedOpIndices - If specified MI is commutable, return the two +/// operand indices that would swap value. Return true if the instruction +/// is not in a form which this routine understands. +bool TargetInstrInfo::findCommutedOpIndices(MachineInstr *MI, + unsigned &SrcOpIdx1, + unsigned &SrcOpIdx2) const { + assert(!MI->isBundle() && + "TargetInstrInfo::findCommutedOpIndices() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MCID.isCommutable()) + return false; + // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this + // is not true, then the target must implement this. + SrcOpIdx1 = MCID.getNumDefs(); + SrcOpIdx2 = SrcOpIdx1 + 1; + if (!MI->getOperand(SrcOpIdx1).isReg() || + !MI->getOperand(SrcOpIdx2).isReg()) + // No idea. + return false; + return true; +} + + +bool +TargetInstrInfo::isUnpredicatedTerminator(const MachineInstr *MI) const { + if (!MI->isTerminator()) return false; + + // Conditional branch is a special case. + if (MI->isBranch() && !MI->isBarrier()) + return true; + if (!MI->isPredicable()) + return true; + return !isPredicated(MI); +} + + +bool TargetInstrInfo::PredicateInstruction(MachineInstr *MI, + const SmallVectorImpl &Pred) const { + bool MadeChange = false; + + assert(!MI->isBundle() && + "TargetInstrInfo::PredicateInstruction() can't handle bundles"); + + const MCInstrDesc &MCID = MI->getDesc(); + if (!MI->isPredicable()) + return false; + + for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { + if (MCID.OpInfo[i].isPredicate()) { + MachineOperand &MO = MI->getOperand(i); + if (MO.isReg()) { + MO.setReg(Pred[j].getReg()); + MadeChange = true; + } else if (MO.isImm()) { + MO.setImm(Pred[j].getImm()); + MadeChange = true; + } else if (MO.isMBB()) { + MO.setMBB(Pred[j].getMBB()); + MadeChange = true; + } + ++j; + } + } + return MadeChange; +} + +bool TargetInstrInfo::hasLoadFromStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isLoad() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + +bool TargetInstrInfo::hasStoreToStackSlot(const MachineInstr *MI, + const MachineMemOperand *&MMO, + int &FrameIndex) const { + for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), + oe = MI->memoperands_end(); + o != oe; + ++o) { + if ((*o)->isStore() && (*o)->getValue()) + if (const FixedStackPseudoSourceValue *Value = + dyn_cast((*o)->getValue())) { + FrameIndex = Value->getFrameIndex(); + MMO = *o; + return true; + } + } + return false; +} + +void TargetInstrInfo::reMaterialize(MachineBasicBlock &MBB, + MachineBasicBlock::iterator I, + unsigned DestReg, + unsigned SubIdx, + const MachineInstr *Orig, + const TargetRegisterInfo &TRI) const { + MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); + MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); + MBB.insert(I, MI); +} + +bool +TargetInstrInfo::produceSameValue(const MachineInstr *MI0, + const MachineInstr *MI1, + const MachineRegisterInfo *MRI) const { + return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); +} + +MachineInstr *TargetInstrInfo::duplicate(MachineInstr *Orig, + MachineFunction &MF) const { + assert(!Orig->isNotDuplicable() && + "Instruction cannot be duplicated"); + return MF.CloneMachineInstr(Orig); +} + +// If the COPY instruction in MI can be folded to a stack operation, return +// the register class to use. +static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, + unsigned FoldIdx) { + assert(MI->isCopy() && "MI must be a COPY instruction"); + if (MI->getNumOperands() != 2) + return 0; + assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); + + const MachineOperand &FoldOp = MI->getOperand(FoldIdx); + const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); + + if (FoldOp.getSubReg() || LiveOp.getSubReg()) + return 0; + + unsigned FoldReg = FoldOp.getReg(); + unsigned LiveReg = LiveOp.getReg(); + + assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && + "Cannot fold physregs"); + + const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); + const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); + + if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) + return RC->contains(LiveOp.getReg()) ? RC : 0; + + if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) + return RC; + + // FIXME: Allow folding when register classes are memory compatible. + return 0; +} + +bool TargetInstrInfo:: +canFoldMemoryOperand(const MachineInstr *MI, + const SmallVectorImpl &Ops) const { + return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); +} + +/// foldMemoryOperand - Attempt to fold a load or store of the specified stack +/// slot into the specified machine instruction for the specified operand(s). +/// If this is possible, a new instruction is returned with the specified +/// operand folded, otherwise NULL is returned. The client is responsible for +/// removing the old instruction and adding the new one in the instruction +/// stream. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops, + int FI) const { + unsigned Flags = 0; + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + if (MI->getOperand(Ops[i]).isDef()) + Flags |= MachineMemOperand::MOStore; + else + Flags |= MachineMemOperand::MOLoad; + + MachineBasicBlock *MBB = MI->getParent(); + assert(MBB && "foldMemoryOperand needs an inserted instruction"); + MachineFunction &MF = *MBB->getParent(); + + // Ask the target to do the actual folding. + if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { + // Add a memory operand, foldMemoryOperandImpl doesn't do that. + assert((!(Flags & MachineMemOperand::MOStore) || + NewMI->mayStore()) && + "Folded a def to a non-store!"); + assert((!(Flags & MachineMemOperand::MOLoad) || + NewMI->mayLoad()) && + "Folded a use to a non-load!"); + const MachineFrameInfo &MFI = *MF.getFrameInfo(); + assert(MFI.getObjectOffset(FI) != -1); + MachineMemOperand *MMO = + MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), + Flags, MFI.getObjectSize(FI), + MFI.getObjectAlignment(FI)); + NewMI->addMemOperand(MF, MMO); + + // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. + return MBB->insert(MI, NewMI); + } + + // Straight COPY may fold as load/store. + if (!MI->isCopy() || Ops.size() != 1) + return 0; + + const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); + if (!RC) + return 0; + + const MachineOperand &MO = MI->getOperand(1-Ops[0]); + MachineBasicBlock::iterator Pos = MI; + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + + if (Flags == MachineMemOperand::MOStore) + storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); + else + loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); + return --Pos; +} + +/// foldMemoryOperand - Same as the previous version except it allows folding +/// of any load and store from / to any address, not just from a specific +/// stack slot. +MachineInstr* +TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, + const SmallVectorImpl &Ops, + MachineInstr* LoadMI) const { + assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); +#ifndef NDEBUG + for (unsigned i = 0, e = Ops.size(); i != e; ++i) + assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); +#endif + MachineBasicBlock &MBB = *MI->getParent(); + MachineFunction &MF = *MBB.getParent(); + + // Ask the target to do the actual folding. + MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); + if (!NewMI) return 0; + + NewMI = MBB.insert(MI, NewMI); + + // Copy the memoperands from the load to the folded instruction. + NewMI->setMemRefs(LoadMI->memoperands_begin(), + LoadMI->memoperands_end()); + + return NewMI; +} + +bool TargetInstrInfo:: +isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, + AliasAnalysis *AA) const { + const MachineFunction &MF = *MI->getParent()->getParent(); + const MachineRegisterInfo &MRI = MF.getRegInfo(); + const TargetMachine &TM = MF.getTarget(); + const TargetInstrInfo &TII = *TM.getInstrInfo(); + + // Remat clients assume operand 0 is the defined register. + if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) + return false; + unsigned DefReg = MI->getOperand(0).getReg(); + + // A sub-register definition can only be rematerialized if the instruction + // doesn't read the other parts of the register. Otherwise it is really a + // read-modify-write operation on the full virtual register which cannot be + // moved safely. + if (TargetRegisterInfo::isVirtualRegister(DefReg) && + MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) + return false; + + // A load from a fixed stack slot can be rematerialized. This may be + // redundant with subsequent checks, but it's target-independent, + // simple, and a common case. + int FrameIdx = 0; + if (TII.isLoadFromStackSlot(MI, FrameIdx) && + MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) + return true; + + // Avoid instructions obviously unsafe for remat. + if (MI->isNotDuplicable() || MI->mayStore() || + MI->hasUnmodeledSideEffects()) + return false; + + // Don't remat inline asm. We have no idea how expensive it is + // even if it's side effect free. + if (MI->isInlineAsm()) + return false; + + // Avoid instructions which load from potentially varying memory. + if (MI->mayLoad() && !MI->isInvariantLoad(AA)) + return false; + + // If any of the registers accessed are non-constant, conservatively assume + // the instruction is not rematerializable. + for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { + const MachineOperand &MO = MI->getOperand(i); + if (!MO.isReg()) continue; + unsigned Reg = MO.getReg(); + if (Reg == 0) + continue; + + // Check for a well-behaved physical register. + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + if (MO.isUse()) { + // If the physreg has no defs anywhere, it's just an ambient register + // and we can freely move its uses. Alternatively, if it's allocatable, + // it could get allocated to something with a def during allocation. + if (!MRI.isConstantPhysReg(Reg, MF)) + return false; + } else { + // A physreg def. We can't remat it. + return false; + } + continue; + } + + // Only allow one virtual-register def. There may be multiple defs of the + // same virtual register, though. + if (MO.isDef() && Reg != DefReg) + return false; + + // Don't allow any virtual-register uses. Rematting an instruction with + // virtual register uses would length the live ranges of the uses, which + // is not necessarily a good idea, certainly not "trivial". + if (MO.isUse()) + return false; + } + + // Everything checked out. + return true; +} + +/// isSchedulingBoundary - Test if the given instruction should be +/// considered a scheduling boundary. This primarily includes labels +/// and terminators. +bool TargetInstrInfo::isSchedulingBoundary(const MachineInstr *MI, + const MachineBasicBlock *MBB, + const MachineFunction &MF) const { + // Terminators and labels can't be scheduled around. + if (MI->isTerminator() || MI->isLabel()) + return true; + + // Don't attempt to schedule around any instruction that defines + // a stack-oriented pointer, as it's unlikely to be profitable. This + // saves compile time, because it doesn't require every single + // stack slot reference to depend on the instruction that does the + // modification. + const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); + const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); + if (MI->modifiesRegister(TLI.getStackPointerRegisterToSaveRestore(), TRI)) + return true; + + return false; +} + +// Provide a global flag for disabling the PreRA hazard recognizer that targets +// may choose to honor. +bool TargetInstrInfo::usePreRAHazardRecognizer() const { + return !DisableHazardRecognizer; +} + +// Default implementation of CreateTargetRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetHazardRecognizer(const TargetMachine *TM, + const ScheduleDAG *DAG) const { + // Dummy hazard recognizer allows all instructions to issue. + return new ScheduleHazardRecognizer(); +} + +// Default implementation of CreateTargetMIHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetMIHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "misched"); +} + +// Default implementation of CreateTargetPostRAHazardRecognizer. +ScheduleHazardRecognizer *TargetInstrInfo:: +CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, + const ScheduleDAG *DAG) const { + return (ScheduleHazardRecognizer *) + new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); +} + +//===----------------------------------------------------------------------===// +// SelectionDAG latency interface. +//===----------------------------------------------------------------------===// + +int +TargetInstrInfo::getOperandLatency(const InstrItineraryData *ItinData, + SDNode *DefNode, unsigned DefIdx, + SDNode *UseNode, unsigned UseIdx) const { + if (!ItinData || ItinData->isEmpty()) + return -1; + + if (!DefNode->isMachineOpcode()) + return -1; + + unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); + if (!UseNode->isMachineOpcode()) + return ItinData->getOperandCycle(DefClass, DefIdx); + unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +int TargetInstrInfo::getInstrLatency(const InstrItineraryData *ItinData, + SDNode *N) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + if (!N->isMachineOpcode()) + return 1; + + return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); +} + +//===----------------------------------------------------------------------===// +// MachineInstr latency interface. +//===----------------------------------------------------------------------===// + +unsigned +TargetInstrInfo::getNumMicroOps(const InstrItineraryData *ItinData, + const MachineInstr *MI) const { + if (!ItinData || ItinData->isEmpty()) + return 1; + + unsigned Class = MI->getDesc().getSchedClass(); + int UOps = ItinData->Itineraries[Class].NumMicroOps; + if (UOps >= 0) + return UOps; + + // The # of u-ops is dynamically determined. The specific target should + // override this function to return the right number. + return 1; +} + +/// Return the default expected latency for a def based on it's opcode. +unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, + const MachineInstr *DefMI) const { + if (DefMI->isTransient()) + return 0; + if (DefMI->mayLoad()) + return SchedModel->LoadLatency; + if (isHighLatencyDef(DefMI->getOpcode())) + return SchedModel->HighLatency; + return 1; +} + +unsigned TargetInstrInfo:: +getInstrLatency(const InstrItineraryData *ItinData, + const MachineInstr *MI, + unsigned *PredCost) const { + // Default to one cycle for no itinerary. However, an "empty" itinerary may + // still have a MinLatency property, which getStageLatency checks. + if (!ItinData) + return MI->mayLoad() ? 2 : 1; + + return ItinData->getStageLatency(MI->getDesc().getSchedClass()); +} + +bool TargetInstrInfo::hasLowDefLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, + unsigned DefIdx) const { + if (!ItinData || ItinData->isEmpty()) + return false; + + unsigned DefClass = DefMI->getDesc().getSchedClass(); + int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); + return (DefCycle != -1 && DefCycle <= 1); +} + +/// Both DefMI and UseMI must be valid. By default, call directly to the +/// itinerary. This may be overriden by the target. +int TargetInstrInfo:: +getOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx) const { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + unsigned UseClass = UseMI->getDesc().getSchedClass(); + return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); +} + +/// If we can determine the operand latency from the def only, without itinerary +/// lookup, do so. Otherwise return -1. +int TargetInstrInfo::computeDefOperandLatency( + const InstrItineraryData *ItinData, + const MachineInstr *DefMI, bool FindMin) const { + + // Let the target hook getInstrLatency handle missing itineraries. + if (!ItinData) + return getInstrLatency(ItinData, DefMI); + + // Return a latency based on the itinerary properties and defining instruction + // if possible. Some common subtargets don't require per-operand latency, + // especially for minimum latencies. + if (FindMin) { + // If MinLatency is valid, call getInstrLatency. This uses Stage latency if + // it exists before defaulting to MinLatency. + if (ItinData->SchedModel->MinLatency >= 0) + return getInstrLatency(ItinData, DefMI); + + // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. + // For empty itineraries, short-cirtuit the check and default to one cycle. + if (ItinData->isEmpty()) + return 1; + } + else if(ItinData->isEmpty()) + return defaultDefLatency(ItinData->SchedModel, DefMI); + + // ...operand lookup required + return -1; +} + +/// computeOperandLatency - Compute and return the latency of the given data +/// dependent def and use when the operand indices are already known. UseMI may +/// be NULL for an unknown use. +/// +/// FindMin may be set to get the minimum vs. expected latency. Minimum +/// latency is used for scheduling groups, while expected latency is for +/// instruction cost and critical path. +/// +/// Depending on the subtarget's itinerary properties, this may or may not need +/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or +/// UseIdx to compute min latency. +unsigned TargetInstrInfo:: +computeOperandLatency(const InstrItineraryData *ItinData, + const MachineInstr *DefMI, unsigned DefIdx, + const MachineInstr *UseMI, unsigned UseIdx, + bool FindMin) const { + + int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); + if (DefLatency >= 0) + return DefLatency; + + assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); + + int OperLatency = 0; + if (UseMI) + OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); + else { + unsigned DefClass = DefMI->getDesc().getSchedClass(); + OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); + } + if (OperLatency >= 0) + return OperLatency; + + // No operand latency was found. + unsigned InstrLatency = getInstrLatency(ItinData, DefMI); + + // Expected latency is the max of the stage latency and itinerary props. + if (!FindMin) + InstrLatency = std::max(InstrLatency, + defaultDefLatency(ItinData->SchedModel, DefMI)); + return InstrLatency; +} diff --git a/lib/CodeGen/TargetInstrInfoImpl.cpp b/lib/CodeGen/TargetInstrInfoImpl.cpp deleted file mode 100644 index 4439192fe2f4..000000000000 --- a/lib/CodeGen/TargetInstrInfoImpl.cpp +++ /dev/null @@ -1,681 +0,0 @@ -//===-- TargetInstrInfoImpl.cpp - Target Instruction Information ----------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements the TargetInstrInfoImpl class, it just provides default -// implementations of various methods. -// -//===----------------------------------------------------------------------===// - -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetLowering.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/CodeGen/MachineFrameInfo.h" -#include "llvm/CodeGen/MachineInstr.h" -#include "llvm/CodeGen/MachineInstrBuilder.h" -#include "llvm/CodeGen/MachineMemOperand.h" -#include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/CodeGen/ScoreboardHazardRecognizer.h" -#include "llvm/CodeGen/PseudoSourceValue.h" -#include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/Debug.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" -using namespace llvm; - -static cl::opt DisableHazardRecognizer( - "disable-sched-hazard", cl::Hidden, cl::init(false), - cl::desc("Disable hazard detection during preRA scheduling")); - -/// ReplaceTailWithBranchTo - Delete the instruction OldInst and everything -/// after it, replacing it with an unconditional branch to NewDest. -void -TargetInstrInfoImpl::ReplaceTailWithBranchTo(MachineBasicBlock::iterator Tail, - MachineBasicBlock *NewDest) const { - MachineBasicBlock *MBB = Tail->getParent(); - - // Remove all the old successors of MBB from the CFG. - while (!MBB->succ_empty()) - MBB->removeSuccessor(MBB->succ_begin()); - - // Remove all the dead instructions from the end of MBB. - MBB->erase(Tail, MBB->end()); - - // If MBB isn't immediately before MBB, insert a branch to it. - if (++MachineFunction::iterator(MBB) != MachineFunction::iterator(NewDest)) - InsertBranch(*MBB, NewDest, 0, SmallVector(), - Tail->getDebugLoc()); - MBB->addSuccessor(NewDest); -} - -// commuteInstruction - The default implementation of this method just exchanges -// the two operands returned by findCommutedOpIndices. -MachineInstr *TargetInstrInfoImpl::commuteInstruction(MachineInstr *MI, - bool NewMI) const { - const MCInstrDesc &MCID = MI->getDesc(); - bool HasDef = MCID.getNumDefs(); - if (HasDef && !MI->getOperand(0).isReg()) - // No idea how to commute this instruction. Target should implement its own. - return 0; - unsigned Idx1, Idx2; - if (!findCommutedOpIndices(MI, Idx1, Idx2)) { - std::string msg; - raw_string_ostream Msg(msg); - Msg << "Don't know how to commute: " << *MI; - report_fatal_error(Msg.str()); - } - - assert(MI->getOperand(Idx1).isReg() && MI->getOperand(Idx2).isReg() && - "This only knows how to commute register operands so far"); - unsigned Reg0 = HasDef ? MI->getOperand(0).getReg() : 0; - unsigned Reg1 = MI->getOperand(Idx1).getReg(); - unsigned Reg2 = MI->getOperand(Idx2).getReg(); - unsigned SubReg0 = HasDef ? MI->getOperand(0).getSubReg() : 0; - unsigned SubReg1 = MI->getOperand(Idx1).getSubReg(); - unsigned SubReg2 = MI->getOperand(Idx2).getSubReg(); - bool Reg1IsKill = MI->getOperand(Idx1).isKill(); - bool Reg2IsKill = MI->getOperand(Idx2).isKill(); - // If destination is tied to either of the commuted source register, then - // it must be updated. - if (HasDef && Reg0 == Reg1 && - MI->getDesc().getOperandConstraint(Idx1, MCOI::TIED_TO) == 0) { - Reg2IsKill = false; - Reg0 = Reg2; - SubReg0 = SubReg2; - } else if (HasDef && Reg0 == Reg2 && - MI->getDesc().getOperandConstraint(Idx2, MCOI::TIED_TO) == 0) { - Reg1IsKill = false; - Reg0 = Reg1; - SubReg0 = SubReg1; - } - - if (NewMI) { - // Create a new instruction. - MachineFunction &MF = *MI->getParent()->getParent(); - MI = MF.CloneMachineInstr(MI); - } - - if (HasDef) { - MI->getOperand(0).setReg(Reg0); - MI->getOperand(0).setSubReg(SubReg0); - } - MI->getOperand(Idx2).setReg(Reg1); - MI->getOperand(Idx1).setReg(Reg2); - MI->getOperand(Idx2).setSubReg(SubReg1); - MI->getOperand(Idx1).setSubReg(SubReg2); - MI->getOperand(Idx2).setIsKill(Reg1IsKill); - MI->getOperand(Idx1).setIsKill(Reg2IsKill); - return MI; -} - -/// findCommutedOpIndices - If specified MI is commutable, return the two -/// operand indices that would swap value. Return true if the instruction -/// is not in a form which this routine understands. -bool TargetInstrInfoImpl::findCommutedOpIndices(MachineInstr *MI, - unsigned &SrcOpIdx1, - unsigned &SrcOpIdx2) const { - assert(!MI->isBundle() && - "TargetInstrInfoImpl::findCommutedOpIndices() can't handle bundles"); - - const MCInstrDesc &MCID = MI->getDesc(); - if (!MCID.isCommutable()) - return false; - // This assumes v0 = op v1, v2 and commuting would swap v1 and v2. If this - // is not true, then the target must implement this. - SrcOpIdx1 = MCID.getNumDefs(); - SrcOpIdx2 = SrcOpIdx1 + 1; - if (!MI->getOperand(SrcOpIdx1).isReg() || - !MI->getOperand(SrcOpIdx2).isReg()) - // No idea. - return false; - return true; -} - - -bool -TargetInstrInfoImpl::isUnpredicatedTerminator(const MachineInstr *MI) const { - if (!MI->isTerminator()) return false; - - // Conditional branch is a special case. - if (MI->isBranch() && !MI->isBarrier()) - return true; - if (!MI->isPredicable()) - return true; - return !isPredicated(MI); -} - - -bool TargetInstrInfoImpl::PredicateInstruction(MachineInstr *MI, - const SmallVectorImpl &Pred) const { - bool MadeChange = false; - - assert(!MI->isBundle() && - "TargetInstrInfoImpl::PredicateInstruction() can't handle bundles"); - - const MCInstrDesc &MCID = MI->getDesc(); - if (!MI->isPredicable()) - return false; - - for (unsigned j = 0, i = 0, e = MI->getNumOperands(); i != e; ++i) { - if (MCID.OpInfo[i].isPredicate()) { - MachineOperand &MO = MI->getOperand(i); - if (MO.isReg()) { - MO.setReg(Pred[j].getReg()); - MadeChange = true; - } else if (MO.isImm()) { - MO.setImm(Pred[j].getImm()); - MadeChange = true; - } else if (MO.isMBB()) { - MO.setMBB(Pred[j].getMBB()); - MadeChange = true; - } - ++j; - } - } - return MadeChange; -} - -bool TargetInstrInfoImpl::hasLoadFromStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isLoad() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - -bool TargetInstrInfoImpl::hasStoreToStackSlot(const MachineInstr *MI, - const MachineMemOperand *&MMO, - int &FrameIndex) const { - for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), - oe = MI->memoperands_end(); - o != oe; - ++o) { - if ((*o)->isStore() && (*o)->getValue()) - if (const FixedStackPseudoSourceValue *Value = - dyn_cast((*o)->getValue())) { - FrameIndex = Value->getFrameIndex(); - MMO = *o; - return true; - } - } - return false; -} - -void TargetInstrInfoImpl::reMaterialize(MachineBasicBlock &MBB, - MachineBasicBlock::iterator I, - unsigned DestReg, - unsigned SubIdx, - const MachineInstr *Orig, - const TargetRegisterInfo &TRI) const { - MachineInstr *MI = MBB.getParent()->CloneMachineInstr(Orig); - MI->substituteRegister(MI->getOperand(0).getReg(), DestReg, SubIdx, TRI); - MBB.insert(I, MI); -} - -bool -TargetInstrInfoImpl::produceSameValue(const MachineInstr *MI0, - const MachineInstr *MI1, - const MachineRegisterInfo *MRI) const { - return MI0->isIdenticalTo(MI1, MachineInstr::IgnoreVRegDefs); -} - -MachineInstr *TargetInstrInfoImpl::duplicate(MachineInstr *Orig, - MachineFunction &MF) const { - assert(!Orig->isNotDuplicable() && - "Instruction cannot be duplicated"); - return MF.CloneMachineInstr(Orig); -} - -// If the COPY instruction in MI can be folded to a stack operation, return -// the register class to use. -static const TargetRegisterClass *canFoldCopy(const MachineInstr *MI, - unsigned FoldIdx) { - assert(MI->isCopy() && "MI must be a COPY instruction"); - if (MI->getNumOperands() != 2) - return 0; - assert(FoldIdx<2 && "FoldIdx refers no nonexistent operand"); - - const MachineOperand &FoldOp = MI->getOperand(FoldIdx); - const MachineOperand &LiveOp = MI->getOperand(1-FoldIdx); - - if (FoldOp.getSubReg() || LiveOp.getSubReg()) - return 0; - - unsigned FoldReg = FoldOp.getReg(); - unsigned LiveReg = LiveOp.getReg(); - - assert(TargetRegisterInfo::isVirtualRegister(FoldReg) && - "Cannot fold physregs"); - - const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); - const TargetRegisterClass *RC = MRI.getRegClass(FoldReg); - - if (TargetRegisterInfo::isPhysicalRegister(LiveOp.getReg())) - return RC->contains(LiveOp.getReg()) ? RC : 0; - - if (RC->hasSubClassEq(MRI.getRegClass(LiveReg))) - return RC; - - // FIXME: Allow folding when register classes are memory compatible. - return 0; -} - -bool TargetInstrInfoImpl:: -canFoldMemoryOperand(const MachineInstr *MI, - const SmallVectorImpl &Ops) const { - return MI->isCopy() && Ops.size() == 1 && canFoldCopy(MI, Ops[0]); -} - -/// foldMemoryOperand - Attempt to fold a load or store of the specified stack -/// slot into the specified machine instruction for the specified operand(s). -/// If this is possible, a new instruction is returned with the specified -/// operand folded, otherwise NULL is returned. The client is responsible for -/// removing the old instruction and adding the new one in the instruction -/// stream. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl &Ops, - int FI) const { - unsigned Flags = 0; - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - if (MI->getOperand(Ops[i]).isDef()) - Flags |= MachineMemOperand::MOStore; - else - Flags |= MachineMemOperand::MOLoad; - - MachineBasicBlock *MBB = MI->getParent(); - assert(MBB && "foldMemoryOperand needs an inserted instruction"); - MachineFunction &MF = *MBB->getParent(); - - // Ask the target to do the actual folding. - if (MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, FI)) { - // Add a memory operand, foldMemoryOperandImpl doesn't do that. - assert((!(Flags & MachineMemOperand::MOStore) || - NewMI->mayStore()) && - "Folded a def to a non-store!"); - assert((!(Flags & MachineMemOperand::MOLoad) || - NewMI->mayLoad()) && - "Folded a use to a non-load!"); - const MachineFrameInfo &MFI = *MF.getFrameInfo(); - assert(MFI.getObjectOffset(FI) != -1); - MachineMemOperand *MMO = - MF.getMachineMemOperand(MachinePointerInfo::getFixedStack(FI), - Flags, MFI.getObjectSize(FI), - MFI.getObjectAlignment(FI)); - NewMI->addMemOperand(MF, MMO); - - // FIXME: change foldMemoryOperandImpl semantics to also insert NewMI. - return MBB->insert(MI, NewMI); - } - - // Straight COPY may fold as load/store. - if (!MI->isCopy() || Ops.size() != 1) - return 0; - - const TargetRegisterClass *RC = canFoldCopy(MI, Ops[0]); - if (!RC) - return 0; - - const MachineOperand &MO = MI->getOperand(1-Ops[0]); - MachineBasicBlock::iterator Pos = MI; - const TargetRegisterInfo *TRI = MF.getTarget().getRegisterInfo(); - - if (Flags == MachineMemOperand::MOStore) - storeRegToStackSlot(*MBB, Pos, MO.getReg(), MO.isKill(), FI, RC, TRI); - else - loadRegFromStackSlot(*MBB, Pos, MO.getReg(), FI, RC, TRI); - return --Pos; -} - -/// foldMemoryOperand - Same as the previous version except it allows folding -/// of any load and store from / to any address, not just from a specific -/// stack slot. -MachineInstr* -TargetInstrInfo::foldMemoryOperand(MachineBasicBlock::iterator MI, - const SmallVectorImpl &Ops, - MachineInstr* LoadMI) const { - assert(LoadMI->canFoldAsLoad() && "LoadMI isn't foldable!"); -#ifndef NDEBUG - for (unsigned i = 0, e = Ops.size(); i != e; ++i) - assert(MI->getOperand(Ops[i]).isUse() && "Folding load into def!"); -#endif - MachineBasicBlock &MBB = *MI->getParent(); - MachineFunction &MF = *MBB.getParent(); - - // Ask the target to do the actual folding. - MachineInstr *NewMI = foldMemoryOperandImpl(MF, MI, Ops, LoadMI); - if (!NewMI) return 0; - - NewMI = MBB.insert(MI, NewMI); - - // Copy the memoperands from the load to the folded instruction. - NewMI->setMemRefs(LoadMI->memoperands_begin(), - LoadMI->memoperands_end()); - - return NewMI; -} - -bool TargetInstrInfo:: -isReallyTriviallyReMaterializableGeneric(const MachineInstr *MI, - AliasAnalysis *AA) const { - const MachineFunction &MF = *MI->getParent()->getParent(); - const MachineRegisterInfo &MRI = MF.getRegInfo(); - const TargetMachine &TM = MF.getTarget(); - const TargetInstrInfo &TII = *TM.getInstrInfo(); - - // Remat clients assume operand 0 is the defined register. - if (!MI->getNumOperands() || !MI->getOperand(0).isReg()) - return false; - unsigned DefReg = MI->getOperand(0).getReg(); - - // A sub-register definition can only be rematerialized if the instruction - // doesn't read the other parts of the register. Otherwise it is really a - // read-modify-write operation on the full virtual register which cannot be - // moved safely. - if (TargetRegisterInfo::isVirtualRegister(DefReg) && - MI->getOperand(0).getSubReg() && MI->readsVirtualRegister(DefReg)) - return false; - - // A load from a fixed stack slot can be rematerialized. This may be - // redundant with subsequent checks, but it's target-independent, - // simple, and a common case. - int FrameIdx = 0; - if (TII.isLoadFromStackSlot(MI, FrameIdx) && - MF.getFrameInfo()->isImmutableObjectIndex(FrameIdx)) - return true; - - // Avoid instructions obviously unsafe for remat. - if (MI->isNotDuplicable() || MI->mayStore() || - MI->hasUnmodeledSideEffects()) - return false; - - // Don't remat inline asm. We have no idea how expensive it is - // even if it's side effect free. - if (MI->isInlineAsm()) - return false; - - // Avoid instructions which load from potentially varying memory. - if (MI->mayLoad() && !MI->isInvariantLoad(AA)) - return false; - - // If any of the registers accessed are non-constant, conservatively assume - // the instruction is not rematerializable. - for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { - const MachineOperand &MO = MI->getOperand(i); - if (!MO.isReg()) continue; - unsigned Reg = MO.getReg(); - if (Reg == 0) - continue; - - // Check for a well-behaved physical register. - if (TargetRegisterInfo::isPhysicalRegister(Reg)) { - if (MO.isUse()) { - // If the physreg has no defs anywhere, it's just an ambient register - // and we can freely move its uses. Alternatively, if it's allocatable, - // it could get allocated to something with a def during allocation. - if (!MRI.isConstantPhysReg(Reg, MF)) - return false; - } else { - // A physreg def. We can't remat it. - return false; - } - continue; - } - - // Only allow one virtual-register def. There may be multiple defs of the - // same virtual register, though. - if (MO.isDef() && Reg != DefReg) - return false; - - // Don't allow any virtual-register uses. Rematting an instruction with - // virtual register uses would length the live ranges of the uses, which - // is not necessarily a good idea, certainly not "trivial". - if (MO.isUse()) - return false; - } - - // Everything checked out. - return true; -} - -/// isSchedulingBoundary - Test if the given instruction should be -/// considered a scheduling boundary. This primarily includes labels -/// and terminators. -bool TargetInstrInfoImpl::isSchedulingBoundary(const MachineInstr *MI, - const MachineBasicBlock *MBB, - const MachineFunction &MF) const{ - // Terminators and labels can't be scheduled around. - if (MI->isTerminator() || MI->isLabel()) - return true; - - // Don't attempt to schedule around any instruction that defines - // a stack-oriented pointer, as it's unlikely to be profitable. This - // saves compile time, because it doesn't require every single - // stack slot reference to depend on the instruction that does the - // modification. - const TargetLowering &TLI = *MF.getTarget().getTargetLowering(); - if (MI->definesRegister(TLI.getStackPointerRegisterToSaveRestore())) - return true; - - return false; -} - -// Provide a global flag for disabling the PreRA hazard recognizer that targets -// may choose to honor. -bool TargetInstrInfoImpl::usePreRAHazardRecognizer() const { - return !DisableHazardRecognizer; -} - -// Default implementation of CreateTargetRAHazardRecognizer. -ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetHazardRecognizer(const TargetMachine *TM, - const ScheduleDAG *DAG) const { - // Dummy hazard recognizer allows all instructions to issue. - return new ScheduleHazardRecognizer(); -} - -// Default implementation of CreateTargetMIHazardRecognizer. -ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetMIHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const { - return (ScheduleHazardRecognizer *) - new ScoreboardHazardRecognizer(II, DAG, "misched"); -} - -// Default implementation of CreateTargetPostRAHazardRecognizer. -ScheduleHazardRecognizer *TargetInstrInfoImpl:: -CreateTargetPostRAHazardRecognizer(const InstrItineraryData *II, - const ScheduleDAG *DAG) const { - return (ScheduleHazardRecognizer *) - new ScoreboardHazardRecognizer(II, DAG, "post-RA-sched"); -} - -//===----------------------------------------------------------------------===// -// SelectionDAG latency interface. -//===----------------------------------------------------------------------===// - -int -TargetInstrInfoImpl::getOperandLatency(const InstrItineraryData *ItinData, - SDNode *DefNode, unsigned DefIdx, - SDNode *UseNode, unsigned UseIdx) const { - if (!ItinData || ItinData->isEmpty()) - return -1; - - if (!DefNode->isMachineOpcode()) - return -1; - - unsigned DefClass = get(DefNode->getMachineOpcode()).getSchedClass(); - if (!UseNode->isMachineOpcode()) - return ItinData->getOperandCycle(DefClass, DefIdx); - unsigned UseClass = get(UseNode->getMachineOpcode()).getSchedClass(); - return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); -} - -int TargetInstrInfoImpl::getInstrLatency(const InstrItineraryData *ItinData, - SDNode *N) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - if (!N->isMachineOpcode()) - return 1; - - return ItinData->getStageLatency(get(N->getMachineOpcode()).getSchedClass()); -} - -//===----------------------------------------------------------------------===// -// MachineInstr latency interface. -//===----------------------------------------------------------------------===// - -unsigned -TargetInstrInfoImpl::getNumMicroOps(const InstrItineraryData *ItinData, - const MachineInstr *MI) const { - if (!ItinData || ItinData->isEmpty()) - return 1; - - unsigned Class = MI->getDesc().getSchedClass(); - int UOps = ItinData->Itineraries[Class].NumMicroOps; - if (UOps >= 0) - return UOps; - - // The # of u-ops is dynamically determined. The specific target should - // override this function to return the right number. - return 1; -} - -/// Return the default expected latency for a def based on it's opcode. -unsigned TargetInstrInfo::defaultDefLatency(const MCSchedModel *SchedModel, - const MachineInstr *DefMI) const { - if (DefMI->isTransient()) - return 0; - if (DefMI->mayLoad()) - return SchedModel->LoadLatency; - if (isHighLatencyDef(DefMI->getOpcode())) - return SchedModel->HighLatency; - return 1; -} - -unsigned TargetInstrInfoImpl:: -getInstrLatency(const InstrItineraryData *ItinData, - const MachineInstr *MI, - unsigned *PredCost) const { - // Default to one cycle for no itinerary. However, an "empty" itinerary may - // still have a MinLatency property, which getStageLatency checks. - if (!ItinData) - return MI->mayLoad() ? 2 : 1; - - return ItinData->getStageLatency(MI->getDesc().getSchedClass()); -} - -bool TargetInstrInfoImpl::hasLowDefLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, - unsigned DefIdx) const { - if (!ItinData || ItinData->isEmpty()) - return false; - - unsigned DefClass = DefMI->getDesc().getSchedClass(); - int DefCycle = ItinData->getOperandCycle(DefClass, DefIdx); - return (DefCycle != -1 && DefCycle <= 1); -} - -/// Both DefMI and UseMI must be valid. By default, call directly to the -/// itinerary. This may be overriden by the target. -int TargetInstrInfoImpl:: -getOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx) const { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - unsigned UseClass = UseMI->getDesc().getSchedClass(); - return ItinData->getOperandLatency(DefClass, DefIdx, UseClass, UseIdx); -} - -/// If we can determine the operand latency from the def only, without itinerary -/// lookup, do so. Otherwise return -1. -int TargetInstrInfo::computeDefOperandLatency( - const InstrItineraryData *ItinData, - const MachineInstr *DefMI, bool FindMin) const { - - // Let the target hook getInstrLatency handle missing itineraries. - if (!ItinData) - return getInstrLatency(ItinData, DefMI); - - // Return a latency based on the itinerary properties and defining instruction - // if possible. Some common subtargets don't require per-operand latency, - // especially for minimum latencies. - if (FindMin) { - // If MinLatency is valid, call getInstrLatency. This uses Stage latency if - // it exists before defaulting to MinLatency. - if (ItinData->SchedModel->MinLatency >= 0) - return getInstrLatency(ItinData, DefMI); - - // If MinLatency is invalid, OperandLatency is interpreted as MinLatency. - // For empty itineraries, short-cirtuit the check and default to one cycle. - if (ItinData->isEmpty()) - return 1; - } - else if(ItinData->isEmpty()) - return defaultDefLatency(ItinData->SchedModel, DefMI); - - // ...operand lookup required - return -1; -} - -/// computeOperandLatency - Compute and return the latency of the given data -/// dependent def and use when the operand indices are already known. UseMI may -/// be NULL for an unknown use. -/// -/// FindMin may be set to get the minimum vs. expected latency. Minimum -/// latency is used for scheduling groups, while expected latency is for -/// instruction cost and critical path. -/// -/// Depending on the subtarget's itinerary properties, this may or may not need -/// to call getOperandLatency(). For most subtargets, we don't need DefIdx or -/// UseIdx to compute min latency. -unsigned TargetInstrInfo:: -computeOperandLatency(const InstrItineraryData *ItinData, - const MachineInstr *DefMI, unsigned DefIdx, - const MachineInstr *UseMI, unsigned UseIdx, - bool FindMin) const { - - int DefLatency = computeDefOperandLatency(ItinData, DefMI, FindMin); - if (DefLatency >= 0) - return DefLatency; - - assert(ItinData && !ItinData->isEmpty() && "computeDefOperandLatency fail"); - - int OperLatency = 0; - if (UseMI) - OperLatency = getOperandLatency(ItinData, DefMI, DefIdx, UseMI, UseIdx); - else { - unsigned DefClass = DefMI->getDesc().getSchedClass(); - OperLatency = ItinData->getOperandCycle(DefClass, DefIdx); - } - if (OperLatency >= 0) - return OperLatency; - - // No operand latency was found. - unsigned InstrLatency = getInstrLatency(ItinData, DefMI); - - // Expected latency is the max of the stage latency and itinerary props. - if (!FindMin) - InstrLatency = std::max(InstrLatency, - defaultDefLatency(ItinData->SchedModel, DefMI)); - return InstrLatency; -} diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp new file mode 100644 index 000000000000..f42bdbd27643 --- /dev/null +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -0,0 +1,1305 @@ +//===-- TargetLoweringBase.cpp - Implement the TargetLoweringBase class ---===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This implements the TargetLoweringBase class. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetLowering.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Triple.h" +#include "llvm/CodeGen/Analysis.h" +#include "llvm/CodeGen/MachineFrameInfo.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineJumpTableInfo.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/MC/MCAsmInfo.h" +#include "llvm/MC/MCExpr.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/MathExtras.h" +#include "llvm/Target/TargetLoweringObjectFile.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" +#include +using namespace llvm; + +/// InitLibcallNames - Set default libcall names. +/// +static void InitLibcallNames(const char **Names, const TargetMachine &TM) { + Names[RTLIB::SHL_I16] = "__ashlhi3"; + Names[RTLIB::SHL_I32] = "__ashlsi3"; + Names[RTLIB::SHL_I64] = "__ashldi3"; + Names[RTLIB::SHL_I128] = "__ashlti3"; + Names[RTLIB::SRL_I16] = "__lshrhi3"; + Names[RTLIB::SRL_I32] = "__lshrsi3"; + Names[RTLIB::SRL_I64] = "__lshrdi3"; + Names[RTLIB::SRL_I128] = "__lshrti3"; + Names[RTLIB::SRA_I16] = "__ashrhi3"; + Names[RTLIB::SRA_I32] = "__ashrsi3"; + Names[RTLIB::SRA_I64] = "__ashrdi3"; + Names[RTLIB::SRA_I128] = "__ashrti3"; + Names[RTLIB::MUL_I8] = "__mulqi3"; + Names[RTLIB::MUL_I16] = "__mulhi3"; + Names[RTLIB::MUL_I32] = "__mulsi3"; + Names[RTLIB::MUL_I64] = "__muldi3"; + Names[RTLIB::MUL_I128] = "__multi3"; + Names[RTLIB::MULO_I32] = "__mulosi4"; + Names[RTLIB::MULO_I64] = "__mulodi4"; + Names[RTLIB::MULO_I128] = "__muloti4"; + Names[RTLIB::SDIV_I8] = "__divqi3"; + Names[RTLIB::SDIV_I16] = "__divhi3"; + Names[RTLIB::SDIV_I32] = "__divsi3"; + Names[RTLIB::SDIV_I64] = "__divdi3"; + Names[RTLIB::SDIV_I128] = "__divti3"; + Names[RTLIB::UDIV_I8] = "__udivqi3"; + Names[RTLIB::UDIV_I16] = "__udivhi3"; + Names[RTLIB::UDIV_I32] = "__udivsi3"; + Names[RTLIB::UDIV_I64] = "__udivdi3"; + Names[RTLIB::UDIV_I128] = "__udivti3"; + Names[RTLIB::SREM_I8] = "__modqi3"; + Names[RTLIB::SREM_I16] = "__modhi3"; + Names[RTLIB::SREM_I32] = "__modsi3"; + Names[RTLIB::SREM_I64] = "__moddi3"; + Names[RTLIB::SREM_I128] = "__modti3"; + Names[RTLIB::UREM_I8] = "__umodqi3"; + Names[RTLIB::UREM_I16] = "__umodhi3"; + Names[RTLIB::UREM_I32] = "__umodsi3"; + Names[RTLIB::UREM_I64] = "__umoddi3"; + Names[RTLIB::UREM_I128] = "__umodti3"; + + // These are generally not available. + Names[RTLIB::SDIVREM_I8] = 0; + Names[RTLIB::SDIVREM_I16] = 0; + Names[RTLIB::SDIVREM_I32] = 0; + Names[RTLIB::SDIVREM_I64] = 0; + Names[RTLIB::SDIVREM_I128] = 0; + Names[RTLIB::UDIVREM_I8] = 0; + Names[RTLIB::UDIVREM_I16] = 0; + Names[RTLIB::UDIVREM_I32] = 0; + Names[RTLIB::UDIVREM_I64] = 0; + Names[RTLIB::UDIVREM_I128] = 0; + + Names[RTLIB::NEG_I32] = "__negsi2"; + Names[RTLIB::NEG_I64] = "__negdi2"; + Names[RTLIB::ADD_F32] = "__addsf3"; + Names[RTLIB::ADD_F64] = "__adddf3"; + Names[RTLIB::ADD_F80] = "__addxf3"; + Names[RTLIB::ADD_F128] = "__addtf3"; + Names[RTLIB::ADD_PPCF128] = "__gcc_qadd"; + Names[RTLIB::SUB_F32] = "__subsf3"; + Names[RTLIB::SUB_F64] = "__subdf3"; + Names[RTLIB::SUB_F80] = "__subxf3"; + Names[RTLIB::SUB_F128] = "__subtf3"; + Names[RTLIB::SUB_PPCF128] = "__gcc_qsub"; + Names[RTLIB::MUL_F32] = "__mulsf3"; + Names[RTLIB::MUL_F64] = "__muldf3"; + Names[RTLIB::MUL_F80] = "__mulxf3"; + Names[RTLIB::MUL_F128] = "__multf3"; + Names[RTLIB::MUL_PPCF128] = "__gcc_qmul"; + Names[RTLIB::DIV_F32] = "__divsf3"; + Names[RTLIB::DIV_F64] = "__divdf3"; + Names[RTLIB::DIV_F80] = "__divxf3"; + Names[RTLIB::DIV_F128] = "__divtf3"; + Names[RTLIB::DIV_PPCF128] = "__gcc_qdiv"; + Names[RTLIB::REM_F32] = "fmodf"; + Names[RTLIB::REM_F64] = "fmod"; + Names[RTLIB::REM_F80] = "fmodl"; + Names[RTLIB::REM_F128] = "fmodl"; + Names[RTLIB::REM_PPCF128] = "fmodl"; + Names[RTLIB::FMA_F32] = "fmaf"; + Names[RTLIB::FMA_F64] = "fma"; + Names[RTLIB::FMA_F80] = "fmal"; + Names[RTLIB::FMA_F128] = "fmal"; + Names[RTLIB::FMA_PPCF128] = "fmal"; + Names[RTLIB::POWI_F32] = "__powisf2"; + Names[RTLIB::POWI_F64] = "__powidf2"; + Names[RTLIB::POWI_F80] = "__powixf2"; + Names[RTLIB::POWI_F128] = "__powitf2"; + Names[RTLIB::POWI_PPCF128] = "__powitf2"; + Names[RTLIB::SQRT_F32] = "sqrtf"; + Names[RTLIB::SQRT_F64] = "sqrt"; + Names[RTLIB::SQRT_F80] = "sqrtl"; + Names[RTLIB::SQRT_F128] = "sqrtl"; + Names[RTLIB::SQRT_PPCF128] = "sqrtl"; + Names[RTLIB::LOG_F32] = "logf"; + Names[RTLIB::LOG_F64] = "log"; + Names[RTLIB::LOG_F80] = "logl"; + Names[RTLIB::LOG_F128] = "logl"; + Names[RTLIB::LOG_PPCF128] = "logl"; + Names[RTLIB::LOG2_F32] = "log2f"; + Names[RTLIB::LOG2_F64] = "log2"; + Names[RTLIB::LOG2_F80] = "log2l"; + Names[RTLIB::LOG2_F128] = "log2l"; + Names[RTLIB::LOG2_PPCF128] = "log2l"; + Names[RTLIB::LOG10_F32] = "log10f"; + Names[RTLIB::LOG10_F64] = "log10"; + Names[RTLIB::LOG10_F80] = "log10l"; + Names[RTLIB::LOG10_F128] = "log10l"; + Names[RTLIB::LOG10_PPCF128] = "log10l"; + Names[RTLIB::EXP_F32] = "expf"; + Names[RTLIB::EXP_F64] = "exp"; + Names[RTLIB::EXP_F80] = "expl"; + Names[RTLIB::EXP_F128] = "expl"; + Names[RTLIB::EXP_PPCF128] = "expl"; + Names[RTLIB::EXP2_F32] = "exp2f"; + Names[RTLIB::EXP2_F64] = "exp2"; + Names[RTLIB::EXP2_F80] = "exp2l"; + Names[RTLIB::EXP2_F128] = "exp2l"; + Names[RTLIB::EXP2_PPCF128] = "exp2l"; + Names[RTLIB::SIN_F32] = "sinf"; + Names[RTLIB::SIN_F64] = "sin"; + Names[RTLIB::SIN_F80] = "sinl"; + Names[RTLIB::SIN_F128] = "sinl"; + Names[RTLIB::SIN_PPCF128] = "sinl"; + Names[RTLIB::COS_F32] = "cosf"; + Names[RTLIB::COS_F64] = "cos"; + Names[RTLIB::COS_F80] = "cosl"; + Names[RTLIB::COS_F128] = "cosl"; + Names[RTLIB::COS_PPCF128] = "cosl"; + Names[RTLIB::POW_F32] = "powf"; + Names[RTLIB::POW_F64] = "pow"; + Names[RTLIB::POW_F80] = "powl"; + Names[RTLIB::POW_F128] = "powl"; + Names[RTLIB::POW_PPCF128] = "powl"; + Names[RTLIB::CEIL_F32] = "ceilf"; + Names[RTLIB::CEIL_F64] = "ceil"; + Names[RTLIB::CEIL_F80] = "ceill"; + Names[RTLIB::CEIL_F128] = "ceill"; + Names[RTLIB::CEIL_PPCF128] = "ceill"; + Names[RTLIB::TRUNC_F32] = "truncf"; + Names[RTLIB::TRUNC_F64] = "trunc"; + Names[RTLIB::TRUNC_F80] = "truncl"; + Names[RTLIB::TRUNC_F128] = "truncl"; + Names[RTLIB::TRUNC_PPCF128] = "truncl"; + Names[RTLIB::RINT_F32] = "rintf"; + Names[RTLIB::RINT_F64] = "rint"; + Names[RTLIB::RINT_F80] = "rintl"; + Names[RTLIB::RINT_F128] = "rintl"; + Names[RTLIB::RINT_PPCF128] = "rintl"; + Names[RTLIB::NEARBYINT_F32] = "nearbyintf"; + Names[RTLIB::NEARBYINT_F64] = "nearbyint"; + Names[RTLIB::NEARBYINT_F80] = "nearbyintl"; + Names[RTLIB::NEARBYINT_F128] = "nearbyintl"; + Names[RTLIB::NEARBYINT_PPCF128] = "nearbyintl"; + Names[RTLIB::FLOOR_F32] = "floorf"; + Names[RTLIB::FLOOR_F64] = "floor"; + Names[RTLIB::FLOOR_F80] = "floorl"; + Names[RTLIB::FLOOR_F128] = "floorl"; + Names[RTLIB::FLOOR_PPCF128] = "floorl"; + Names[RTLIB::COPYSIGN_F32] = "copysignf"; + Names[RTLIB::COPYSIGN_F64] = "copysign"; + Names[RTLIB::COPYSIGN_F80] = "copysignl"; + Names[RTLIB::COPYSIGN_F128] = "copysignl"; + Names[RTLIB::COPYSIGN_PPCF128] = "copysignl"; + Names[RTLIB::FPEXT_F64_F128] = "__extenddftf2"; + Names[RTLIB::FPEXT_F32_F128] = "__extendsftf2"; + Names[RTLIB::FPEXT_F32_F64] = "__extendsfdf2"; + Names[RTLIB::FPEXT_F16_F32] = "__gnu_h2f_ieee"; + Names[RTLIB::FPROUND_F32_F16] = "__gnu_f2h_ieee"; + Names[RTLIB::FPROUND_F64_F32] = "__truncdfsf2"; + Names[RTLIB::FPROUND_F80_F32] = "__truncxfsf2"; + Names[RTLIB::FPROUND_F128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_PPCF128_F32] = "__trunctfsf2"; + Names[RTLIB::FPROUND_F80_F64] = "__truncxfdf2"; + Names[RTLIB::FPROUND_F128_F64] = "__trunctfdf2"; + Names[RTLIB::FPROUND_PPCF128_F64] = "__trunctfdf2"; + Names[RTLIB::FPTOSINT_F32_I8] = "__fixsfqi"; + Names[RTLIB::FPTOSINT_F32_I16] = "__fixsfhi"; + Names[RTLIB::FPTOSINT_F32_I32] = "__fixsfsi"; + Names[RTLIB::FPTOSINT_F32_I64] = "__fixsfdi"; + Names[RTLIB::FPTOSINT_F32_I128] = "__fixsfti"; + Names[RTLIB::FPTOSINT_F64_I8] = "__fixdfqi"; + Names[RTLIB::FPTOSINT_F64_I16] = "__fixdfhi"; + Names[RTLIB::FPTOSINT_F64_I32] = "__fixdfsi"; + Names[RTLIB::FPTOSINT_F64_I64] = "__fixdfdi"; + Names[RTLIB::FPTOSINT_F64_I128] = "__fixdfti"; + Names[RTLIB::FPTOSINT_F80_I32] = "__fixxfsi"; + Names[RTLIB::FPTOSINT_F80_I64] = "__fixxfdi"; + Names[RTLIB::FPTOSINT_F80_I128] = "__fixxfti"; + Names[RTLIB::FPTOSINT_F128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_F128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_F128_I128] = "__fixtfti"; + Names[RTLIB::FPTOSINT_PPCF128_I32] = "__fixtfsi"; + Names[RTLIB::FPTOSINT_PPCF128_I64] = "__fixtfdi"; + Names[RTLIB::FPTOSINT_PPCF128_I128] = "__fixtfti"; + Names[RTLIB::FPTOUINT_F32_I8] = "__fixunssfqi"; + Names[RTLIB::FPTOUINT_F32_I16] = "__fixunssfhi"; + Names[RTLIB::FPTOUINT_F32_I32] = "__fixunssfsi"; + Names[RTLIB::FPTOUINT_F32_I64] = "__fixunssfdi"; + Names[RTLIB::FPTOUINT_F32_I128] = "__fixunssfti"; + Names[RTLIB::FPTOUINT_F64_I8] = "__fixunsdfqi"; + Names[RTLIB::FPTOUINT_F64_I16] = "__fixunsdfhi"; + Names[RTLIB::FPTOUINT_F64_I32] = "__fixunsdfsi"; + Names[RTLIB::FPTOUINT_F64_I64] = "__fixunsdfdi"; + Names[RTLIB::FPTOUINT_F64_I128] = "__fixunsdfti"; + Names[RTLIB::FPTOUINT_F80_I32] = "__fixunsxfsi"; + Names[RTLIB::FPTOUINT_F80_I64] = "__fixunsxfdi"; + Names[RTLIB::FPTOUINT_F80_I128] = "__fixunsxfti"; + Names[RTLIB::FPTOUINT_F128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_F128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_F128_I128] = "__fixunstfti"; + Names[RTLIB::FPTOUINT_PPCF128_I32] = "__fixunstfsi"; + Names[RTLIB::FPTOUINT_PPCF128_I64] = "__fixunstfdi"; + Names[RTLIB::FPTOUINT_PPCF128_I128] = "__fixunstfti"; + Names[RTLIB::SINTTOFP_I32_F32] = "__floatsisf"; + Names[RTLIB::SINTTOFP_I32_F64] = "__floatsidf"; + Names[RTLIB::SINTTOFP_I32_F80] = "__floatsixf"; + Names[RTLIB::SINTTOFP_I32_F128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I32_PPCF128] = "__floatsitf"; + Names[RTLIB::SINTTOFP_I64_F32] = "__floatdisf"; + Names[RTLIB::SINTTOFP_I64_F64] = "__floatdidf"; + Names[RTLIB::SINTTOFP_I64_F80] = "__floatdixf"; + Names[RTLIB::SINTTOFP_I64_F128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I64_PPCF128] = "__floatditf"; + Names[RTLIB::SINTTOFP_I128_F32] = "__floattisf"; + Names[RTLIB::SINTTOFP_I128_F64] = "__floattidf"; + Names[RTLIB::SINTTOFP_I128_F80] = "__floattixf"; + Names[RTLIB::SINTTOFP_I128_F128] = "__floattitf"; + Names[RTLIB::SINTTOFP_I128_PPCF128] = "__floattitf"; + Names[RTLIB::UINTTOFP_I32_F32] = "__floatunsisf"; + Names[RTLIB::UINTTOFP_I32_F64] = "__floatunsidf"; + Names[RTLIB::UINTTOFP_I32_F80] = "__floatunsixf"; + Names[RTLIB::UINTTOFP_I32_F128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I32_PPCF128] = "__floatunsitf"; + Names[RTLIB::UINTTOFP_I64_F32] = "__floatundisf"; + Names[RTLIB::UINTTOFP_I64_F64] = "__floatundidf"; + Names[RTLIB::UINTTOFP_I64_F80] = "__floatundixf"; + Names[RTLIB::UINTTOFP_I64_F128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I64_PPCF128] = "__floatunditf"; + Names[RTLIB::UINTTOFP_I128_F32] = "__floatuntisf"; + Names[RTLIB::UINTTOFP_I128_F64] = "__floatuntidf"; + Names[RTLIB::UINTTOFP_I128_F80] = "__floatuntixf"; + Names[RTLIB::UINTTOFP_I128_F128] = "__floatuntitf"; + Names[RTLIB::UINTTOFP_I128_PPCF128] = "__floatuntitf"; + Names[RTLIB::OEQ_F32] = "__eqsf2"; + Names[RTLIB::OEQ_F64] = "__eqdf2"; + Names[RTLIB::OEQ_F128] = "__eqtf2"; + Names[RTLIB::UNE_F32] = "__nesf2"; + Names[RTLIB::UNE_F64] = "__nedf2"; + Names[RTLIB::UNE_F128] = "__netf2"; + Names[RTLIB::OGE_F32] = "__gesf2"; + Names[RTLIB::OGE_F64] = "__gedf2"; + Names[RTLIB::OGE_F128] = "__getf2"; + Names[RTLIB::OLT_F32] = "__ltsf2"; + Names[RTLIB::OLT_F64] = "__ltdf2"; + Names[RTLIB::OLT_F128] = "__lttf2"; + Names[RTLIB::OLE_F32] = "__lesf2"; + Names[RTLIB::OLE_F64] = "__ledf2"; + Names[RTLIB::OLE_F128] = "__letf2"; + Names[RTLIB::OGT_F32] = "__gtsf2"; + Names[RTLIB::OGT_F64] = "__gtdf2"; + Names[RTLIB::OGT_F128] = "__gttf2"; + Names[RTLIB::UO_F32] = "__unordsf2"; + Names[RTLIB::UO_F64] = "__unorddf2"; + Names[RTLIB::UO_F128] = "__unordtf2"; + Names[RTLIB::O_F32] = "__unordsf2"; + Names[RTLIB::O_F64] = "__unorddf2"; + Names[RTLIB::O_F128] = "__unordtf2"; + Names[RTLIB::MEMCPY] = "memcpy"; + Names[RTLIB::MEMMOVE] = "memmove"; + Names[RTLIB::MEMSET] = "memset"; + Names[RTLIB::UNWIND_RESUME] = "_Unwind_Resume"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_1] = "__sync_val_compare_and_swap_1"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_2] = "__sync_val_compare_and_swap_2"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_4] = "__sync_val_compare_and_swap_4"; + Names[RTLIB::SYNC_VAL_COMPARE_AND_SWAP_8] = "__sync_val_compare_and_swap_8"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_1] = "__sync_lock_test_and_set_1"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_2] = "__sync_lock_test_and_set_2"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_4] = "__sync_lock_test_and_set_4"; + Names[RTLIB::SYNC_LOCK_TEST_AND_SET_8] = "__sync_lock_test_and_set_8"; + Names[RTLIB::SYNC_FETCH_AND_ADD_1] = "__sync_fetch_and_add_1"; + Names[RTLIB::SYNC_FETCH_AND_ADD_2] = "__sync_fetch_and_add_2"; + Names[RTLIB::SYNC_FETCH_AND_ADD_4] = "__sync_fetch_and_add_4"; + Names[RTLIB::SYNC_FETCH_AND_ADD_8] = "__sync_fetch_and_add_8"; + Names[RTLIB::SYNC_FETCH_AND_SUB_1] = "__sync_fetch_and_sub_1"; + Names[RTLIB::SYNC_FETCH_AND_SUB_2] = "__sync_fetch_and_sub_2"; + Names[RTLIB::SYNC_FETCH_AND_SUB_4] = "__sync_fetch_and_sub_4"; + Names[RTLIB::SYNC_FETCH_AND_SUB_8] = "__sync_fetch_and_sub_8"; + Names[RTLIB::SYNC_FETCH_AND_AND_1] = "__sync_fetch_and_and_1"; + Names[RTLIB::SYNC_FETCH_AND_AND_2] = "__sync_fetch_and_and_2"; + Names[RTLIB::SYNC_FETCH_AND_AND_4] = "__sync_fetch_and_and_4"; + Names[RTLIB::SYNC_FETCH_AND_AND_8] = "__sync_fetch_and_and_8"; + Names[RTLIB::SYNC_FETCH_AND_OR_1] = "__sync_fetch_and_or_1"; + Names[RTLIB::SYNC_FETCH_AND_OR_2] = "__sync_fetch_and_or_2"; + Names[RTLIB::SYNC_FETCH_AND_OR_4] = "__sync_fetch_and_or_4"; + Names[RTLIB::SYNC_FETCH_AND_OR_8] = "__sync_fetch_and_or_8"; + Names[RTLIB::SYNC_FETCH_AND_XOR_1] = "__sync_fetch_and_xor_1"; + Names[RTLIB::SYNC_FETCH_AND_XOR_2] = "__sync_fetch_and_xor_2"; + Names[RTLIB::SYNC_FETCH_AND_XOR_4] = "__sync_fetch_and_xor_4"; + Names[RTLIB::SYNC_FETCH_AND_XOR_8] = "__sync_fetch_and_xor_8"; + Names[RTLIB::SYNC_FETCH_AND_NAND_1] = "__sync_fetch_and_nand_1"; + Names[RTLIB::SYNC_FETCH_AND_NAND_2] = "__sync_fetch_and_nand_2"; + Names[RTLIB::SYNC_FETCH_AND_NAND_4] = "__sync_fetch_and_nand_4"; + Names[RTLIB::SYNC_FETCH_AND_NAND_8] = "__sync_fetch_and_nand_8"; + + if (Triple(TM.getTargetTriple()).getEnvironment() == Triple::GNU) { + Names[RTLIB::SINCOS_F32] = "sincosf"; + Names[RTLIB::SINCOS_F64] = "sincos"; + Names[RTLIB::SINCOS_F80] = "sincosl"; + Names[RTLIB::SINCOS_F128] = "sincosl"; + Names[RTLIB::SINCOS_PPCF128] = "sincosl"; + } else { + // These are generally not available. + Names[RTLIB::SINCOS_F32] = 0; + Names[RTLIB::SINCOS_F64] = 0; + Names[RTLIB::SINCOS_F80] = 0; + Names[RTLIB::SINCOS_F128] = 0; + Names[RTLIB::SINCOS_PPCF128] = 0; + } +} + +/// InitLibcallCallingConvs - Set default libcall CallingConvs. +/// +static void InitLibcallCallingConvs(CallingConv::ID *CCs) { + for (int i = 0; i < RTLIB::UNKNOWN_LIBCALL; ++i) { + CCs[i] = CallingConv::C; + } +} + +/// getFPEXT - Return the FPEXT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPEXT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::f64) + return FPEXT_F32_F64; + if (RetVT == MVT::f128) + return FPEXT_F32_F128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::f128) + return FPEXT_F64_F128; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPROUND - Return the FPROUND_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPROUND(EVT OpVT, EVT RetVT) { + if (RetVT == MVT::f32) { + if (OpVT == MVT::f64) + return FPROUND_F64_F32; + if (OpVT == MVT::f80) + return FPROUND_F80_F32; + if (OpVT == MVT::f128) + return FPROUND_F128_F32; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F32; + } else if (RetVT == MVT::f64) { + if (OpVT == MVT::f80) + return FPROUND_F80_F64; + if (OpVT == MVT::f128) + return FPROUND_F128_F64; + if (OpVT == MVT::ppcf128) + return FPROUND_PPCF128_F64; + } + + return UNKNOWN_LIBCALL; +} + +/// getFPTOSINT - Return the FPTOSINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOSINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOSINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOSINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOSINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOSINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOSINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOSINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOSINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOSINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOSINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getFPTOUINT - Return the FPTOUINT_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getFPTOUINT(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::f32) { + if (RetVT == MVT::i8) + return FPTOUINT_F32_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F32_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F32_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F32_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F32_I128; + } else if (OpVT == MVT::f64) { + if (RetVT == MVT::i8) + return FPTOUINT_F64_I8; + if (RetVT == MVT::i16) + return FPTOUINT_F64_I16; + if (RetVT == MVT::i32) + return FPTOUINT_F64_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F64_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F64_I128; + } else if (OpVT == MVT::f80) { + if (RetVT == MVT::i32) + return FPTOUINT_F80_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F80_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F80_I128; + } else if (OpVT == MVT::f128) { + if (RetVT == MVT::i32) + return FPTOUINT_F128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_F128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_F128_I128; + } else if (OpVT == MVT::ppcf128) { + if (RetVT == MVT::i32) + return FPTOUINT_PPCF128_I32; + if (RetVT == MVT::i64) + return FPTOUINT_PPCF128_I64; + if (RetVT == MVT::i128) + return FPTOUINT_PPCF128_I128; + } + return UNKNOWN_LIBCALL; +} + +/// getSINTTOFP - Return the SINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getSINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return SINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return SINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return SINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return SINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return SINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return SINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return SINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// getUINTTOFP - Return the UINTTOFP_*_* value for the given types, or +/// UNKNOWN_LIBCALL if there is none. +RTLIB::Libcall RTLIB::getUINTTOFP(EVT OpVT, EVT RetVT) { + if (OpVT == MVT::i32) { + if (RetVT == MVT::f32) + return UINTTOFP_I32_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I32_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I32_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I32_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I32_PPCF128; + } else if (OpVT == MVT::i64) { + if (RetVT == MVT::f32) + return UINTTOFP_I64_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I64_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I64_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I64_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I64_PPCF128; + } else if (OpVT == MVT::i128) { + if (RetVT == MVT::f32) + return UINTTOFP_I128_F32; + if (RetVT == MVT::f64) + return UINTTOFP_I128_F64; + if (RetVT == MVT::f80) + return UINTTOFP_I128_F80; + if (RetVT == MVT::f128) + return UINTTOFP_I128_F128; + if (RetVT == MVT::ppcf128) + return UINTTOFP_I128_PPCF128; + } + return UNKNOWN_LIBCALL; +} + +/// InitCmpLibcallCCs - Set default comparison libcall CC. +/// +static void InitCmpLibcallCCs(ISD::CondCode *CCs) { + memset(CCs, ISD::SETCC_INVALID, sizeof(ISD::CondCode)*RTLIB::UNKNOWN_LIBCALL); + CCs[RTLIB::OEQ_F32] = ISD::SETEQ; + CCs[RTLIB::OEQ_F64] = ISD::SETEQ; + CCs[RTLIB::OEQ_F128] = ISD::SETEQ; + CCs[RTLIB::UNE_F32] = ISD::SETNE; + CCs[RTLIB::UNE_F64] = ISD::SETNE; + CCs[RTLIB::UNE_F128] = ISD::SETNE; + CCs[RTLIB::OGE_F32] = ISD::SETGE; + CCs[RTLIB::OGE_F64] = ISD::SETGE; + CCs[RTLIB::OGE_F128] = ISD::SETGE; + CCs[RTLIB::OLT_F32] = ISD::SETLT; + CCs[RTLIB::OLT_F64] = ISD::SETLT; + CCs[RTLIB::OLT_F128] = ISD::SETLT; + CCs[RTLIB::OLE_F32] = ISD::SETLE; + CCs[RTLIB::OLE_F64] = ISD::SETLE; + CCs[RTLIB::OLE_F128] = ISD::SETLE; + CCs[RTLIB::OGT_F32] = ISD::SETGT; + CCs[RTLIB::OGT_F64] = ISD::SETGT; + CCs[RTLIB::OGT_F128] = ISD::SETGT; + CCs[RTLIB::UO_F32] = ISD::SETNE; + CCs[RTLIB::UO_F64] = ISD::SETNE; + CCs[RTLIB::UO_F128] = ISD::SETNE; + CCs[RTLIB::O_F32] = ISD::SETEQ; + CCs[RTLIB::O_F64] = ISD::SETEQ; + CCs[RTLIB::O_F128] = ISD::SETEQ; +} + +/// NOTE: The constructor takes ownership of TLOF. +TargetLoweringBase::TargetLoweringBase(const TargetMachine &tm, + const TargetLoweringObjectFile *tlof) + : TM(tm), TD(TM.getDataLayout()), TLOF(*tlof) { + // All operations default to being supported. + memset(OpActions, 0, sizeof(OpActions)); + memset(LoadExtActions, 0, sizeof(LoadExtActions)); + memset(TruncStoreActions, 0, sizeof(TruncStoreActions)); + memset(IndexedModeActions, 0, sizeof(IndexedModeActions)); + memset(CondCodeActions, 0, sizeof(CondCodeActions)); + + // Set default actions for various operations. + for (unsigned VT = 0; VT != (unsigned)MVT::LAST_VALUETYPE; ++VT) { + // Default all indexed load / store to expand. + for (unsigned IM = (unsigned)ISD::PRE_INC; + IM != (unsigned)ISD::LAST_INDEXED_MODE; ++IM) { + setIndexedLoadAction(IM, (MVT::SimpleValueType)VT, Expand); + setIndexedStoreAction(IM, (MVT::SimpleValueType)VT, Expand); + } + + // These operations default to expand. + setOperationAction(ISD::FGETSIGN, (MVT::SimpleValueType)VT, Expand); + setOperationAction(ISD::CONCAT_VECTORS, (MVT::SimpleValueType)VT, Expand); + } + + // Most targets ignore the @llvm.prefetch intrinsic. + setOperationAction(ISD::PREFETCH, MVT::Other, Expand); + + // ConstantFP nodes default to expand. Targets can either change this to + // Legal, in which case all fp constants are legal, or use isFPImmLegal() + // to optimize expansions for certain constants. + setOperationAction(ISD::ConstantFP, MVT::f16, Expand); + setOperationAction(ISD::ConstantFP, MVT::f32, Expand); + setOperationAction(ISD::ConstantFP, MVT::f64, Expand); + setOperationAction(ISD::ConstantFP, MVT::f80, Expand); + setOperationAction(ISD::ConstantFP, MVT::f128, Expand); + + // These library functions default to expand. + setOperationAction(ISD::FLOG , MVT::f16, Expand); + setOperationAction(ISD::FLOG2, MVT::f16, Expand); + setOperationAction(ISD::FLOG10, MVT::f16, Expand); + setOperationAction(ISD::FEXP , MVT::f16, Expand); + setOperationAction(ISD::FEXP2, MVT::f16, Expand); + setOperationAction(ISD::FFLOOR, MVT::f16, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f16, Expand); + setOperationAction(ISD::FCEIL, MVT::f16, Expand); + setOperationAction(ISD::FRINT, MVT::f16, Expand); + setOperationAction(ISD::FTRUNC, MVT::f16, Expand); + setOperationAction(ISD::FLOG , MVT::f32, Expand); + setOperationAction(ISD::FLOG2, MVT::f32, Expand); + setOperationAction(ISD::FLOG10, MVT::f32, Expand); + setOperationAction(ISD::FEXP , MVT::f32, Expand); + setOperationAction(ISD::FEXP2, MVT::f32, Expand); + setOperationAction(ISD::FFLOOR, MVT::f32, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f32, Expand); + setOperationAction(ISD::FCEIL, MVT::f32, Expand); + setOperationAction(ISD::FRINT, MVT::f32, Expand); + setOperationAction(ISD::FTRUNC, MVT::f32, Expand); + setOperationAction(ISD::FLOG , MVT::f64, Expand); + setOperationAction(ISD::FLOG2, MVT::f64, Expand); + setOperationAction(ISD::FLOG10, MVT::f64, Expand); + setOperationAction(ISD::FEXP , MVT::f64, Expand); + setOperationAction(ISD::FEXP2, MVT::f64, Expand); + setOperationAction(ISD::FFLOOR, MVT::f64, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f64, Expand); + setOperationAction(ISD::FCEIL, MVT::f64, Expand); + setOperationAction(ISD::FRINT, MVT::f64, Expand); + setOperationAction(ISD::FTRUNC, MVT::f64, Expand); + setOperationAction(ISD::FLOG , MVT::f128, Expand); + setOperationAction(ISD::FLOG2, MVT::f128, Expand); + setOperationAction(ISD::FLOG10, MVT::f128, Expand); + setOperationAction(ISD::FEXP , MVT::f128, Expand); + setOperationAction(ISD::FEXP2, MVT::f128, Expand); + setOperationAction(ISD::FFLOOR, MVT::f128, Expand); + setOperationAction(ISD::FNEARBYINT, MVT::f128, Expand); + setOperationAction(ISD::FCEIL, MVT::f128, Expand); + setOperationAction(ISD::FRINT, MVT::f128, Expand); + setOperationAction(ISD::FTRUNC, MVT::f128, Expand); + + // Default ISD::TRAP to expand (which turns it into abort). + setOperationAction(ISD::TRAP, MVT::Other, Expand); + + // On most systems, DEBUGTRAP and TRAP have no difference. The "Expand" + // here is to inform DAG Legalizer to replace DEBUGTRAP with TRAP. + // + setOperationAction(ISD::DEBUGTRAP, MVT::Other, Expand); + + IsLittleEndian = TD->isLittleEndian(); + PointerTy = MVT::getIntegerVT(8*TD->getPointerSize(0)); + memset(RegClassForVT, 0,MVT::LAST_VALUETYPE*sizeof(TargetRegisterClass*)); + memset(TargetDAGCombineArray, 0, array_lengthof(TargetDAGCombineArray)); + MaxStoresPerMemset = MaxStoresPerMemcpy = MaxStoresPerMemmove = 8; + MaxStoresPerMemsetOptSize = MaxStoresPerMemcpyOptSize + = MaxStoresPerMemmoveOptSize = 4; + UseUnderscoreSetJmp = false; + UseUnderscoreLongJmp = false; + SelectIsExpensive = false; + IntDivIsCheap = false; + Pow2DivIsCheap = false; + JumpIsExpensive = false; + PredictableSelectIsExpensive = false; + StackPointerRegisterToSaveRestore = 0; + ExceptionPointerRegister = 0; + ExceptionSelectorRegister = 0; + BooleanContents = UndefinedBooleanContent; + BooleanVectorContents = UndefinedBooleanContent; + SchedPreferenceInfo = Sched::ILP; + JumpBufSize = 0; + JumpBufAlignment = 0; + MinFunctionAlignment = 0; + PrefFunctionAlignment = 0; + PrefLoopAlignment = 0; + MinStackArgumentAlignment = 1; + ShouldFoldAtomicFences = false; + InsertFencesForAtomic = false; + SupportJumpTables = true; + MinimumJumpTableEntries = 4; + + InitLibcallNames(LibcallRoutineNames, TM); + InitCmpLibcallCCs(CmpLibcallCCs); + InitLibcallCallingConvs(LibcallCallingConvs); +} + +TargetLoweringBase::~TargetLoweringBase() { + delete &TLOF; +} + +MVT TargetLoweringBase::getScalarShiftAmountTy(EVT LHSTy) const { + return MVT::getIntegerVT(8*TD->getPointerSize(0)); +} + +EVT TargetLoweringBase::getShiftAmountTy(EVT LHSTy) const { + assert(LHSTy.isInteger() && "Shift amount is not an integer type!"); + if (LHSTy.isVector()) + return LHSTy; + return getScalarShiftAmountTy(LHSTy); +} + +/// canOpTrap - Returns true if the operation can trap for the value type. +/// VT must be a legal type. +bool TargetLoweringBase::canOpTrap(unsigned Op, EVT VT) const { + assert(isTypeLegal(VT)); + switch (Op) { + default: + return false; + case ISD::FDIV: + case ISD::FREM: + case ISD::SDIV: + case ISD::UDIV: + case ISD::SREM: + case ISD::UREM: + return true; + } +} + + +static unsigned getVectorTypeBreakdownMVT(MVT VT, MVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT, + TargetLoweringBase *TLI) { + // Figure out the right, legal destination reg to copy into. + unsigned NumElts = VT.getVectorNumElements(); + MVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !TLI->isTypeLegal(MVT::getVectorVT(EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + MVT NewVT = MVT::getVectorVT(EltTy, NumElts); + if (!TLI->isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + MVT DestVT = TLI->getRegisterType(NewVT); + RegisterVT = DestVT; + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// isLegalRC - Return true if the value types that can be represented by the +/// specified register class are all legal. +bool TargetLoweringBase::isLegalRC(const TargetRegisterClass *RC) const { + for (TargetRegisterClass::vt_iterator I = RC->vt_begin(), E = RC->vt_end(); + I != E; ++I) { + if (isTypeLegal(*I)) + return true; + } + return false; +} + +/// findRepresentativeClass - Return the largest legal super-reg register class +/// of the register class for the specified type and its associated "cost". +std::pair +TargetLoweringBase::findRepresentativeClass(MVT VT) const { + const TargetRegisterInfo *TRI = getTargetMachine().getRegisterInfo(); + const TargetRegisterClass *RC = RegClassForVT[VT.SimpleTy]; + if (!RC) + return std::make_pair(RC, 0); + + // Compute the set of all super-register classes. + BitVector SuperRegRC(TRI->getNumRegClasses()); + for (SuperRegClassIterator RCI(RC, TRI); RCI.isValid(); ++RCI) + SuperRegRC.setBitsInMask(RCI.getMask()); + + // Find the first legal register class with the largest spill size. + const TargetRegisterClass *BestRC = RC; + for (int i = SuperRegRC.find_first(); i >= 0; i = SuperRegRC.find_next(i)) { + const TargetRegisterClass *SuperRC = TRI->getRegClass(i); + // We want the largest possible spill size. + if (SuperRC->getSize() <= BestRC->getSize()) + continue; + if (!isLegalRC(SuperRC)) + continue; + BestRC = SuperRC; + } + return std::make_pair(BestRC, 1); +} + +/// computeRegisterProperties - Once all of the register classes are added, +/// this allows us to compute derived properties we expose. +void TargetLoweringBase::computeRegisterProperties() { + assert(MVT::LAST_VALUETYPE <= MVT::MAX_ALLOWED_VALUETYPE && + "Too many value types for ValueTypeActions to hold!"); + + // Everything defaults to needing one register. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + NumRegistersForVT[i] = 1; + RegisterTypeForVT[i] = TransformToType[i] = (MVT::SimpleValueType)i; + } + // ...except isVoid, which doesn't need any registers. + NumRegistersForVT[MVT::isVoid] = 0; + + // Find the largest integer register class. + unsigned LargestIntReg = MVT::LAST_INTEGER_VALUETYPE; + for (; RegClassForVT[LargestIntReg] == 0; --LargestIntReg) + assert(LargestIntReg != MVT::i1 && "No integer registers defined!"); + + // Every integer value type larger than this largest register takes twice as + // many registers to represent as the previous ValueType. + for (unsigned ExpandedReg = LargestIntReg + 1; + ExpandedReg <= MVT::LAST_INTEGER_VALUETYPE; ++ExpandedReg) { + NumRegistersForVT[ExpandedReg] = 2*NumRegistersForVT[ExpandedReg-1]; + RegisterTypeForVT[ExpandedReg] = (MVT::SimpleValueType)LargestIntReg; + TransformToType[ExpandedReg] = (MVT::SimpleValueType)(ExpandedReg - 1); + ValueTypeActions.setTypeAction((MVT::SimpleValueType)ExpandedReg, + TypeExpandInteger); + } + + // Inspect all of the ValueType's smaller than the largest integer + // register to see which ones need promotion. + unsigned LegalIntReg = LargestIntReg; + for (unsigned IntReg = LargestIntReg - 1; + IntReg >= (unsigned)MVT::i1; --IntReg) { + MVT IVT = (MVT::SimpleValueType)IntReg; + if (isTypeLegal(IVT)) { + LegalIntReg = IntReg; + } else { + RegisterTypeForVT[IntReg] = TransformToType[IntReg] = + (const MVT::SimpleValueType)LegalIntReg; + ValueTypeActions.setTypeAction(IVT, TypePromoteInteger); + } + } + + // ppcf128 type is really two f64's. + if (!isTypeLegal(MVT::ppcf128)) { + NumRegistersForVT[MVT::ppcf128] = 2*NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::ppcf128] = MVT::f64; + TransformToType[MVT::ppcf128] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::ppcf128, TypeExpandFloat); + } + + // Decide how to handle f128. If the target does not have native f128 support, + // expand it to i128 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f128)) { + NumRegistersForVT[MVT::f128] = NumRegistersForVT[MVT::i128]; + RegisterTypeForVT[MVT::f128] = RegisterTypeForVT[MVT::i128]; + TransformToType[MVT::f128] = MVT::i128; + ValueTypeActions.setTypeAction(MVT::f128, TypeSoftenFloat); + } + + // Decide how to handle f64. If the target does not have native f64 support, + // expand it to i64 and we will be generating soft float library calls. + if (!isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f64] = NumRegistersForVT[MVT::i64]; + RegisterTypeForVT[MVT::f64] = RegisterTypeForVT[MVT::i64]; + TransformToType[MVT::f64] = MVT::i64; + ValueTypeActions.setTypeAction(MVT::f64, TypeSoftenFloat); + } + + // Decide how to handle f32. If the target does not have native support for + // f32, promote it to f64 if it is legal. Otherwise, expand it to i32. + if (!isTypeLegal(MVT::f32)) { + if (isTypeLegal(MVT::f64)) { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::f64]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::f64]; + TransformToType[MVT::f32] = MVT::f64; + ValueTypeActions.setTypeAction(MVT::f32, TypePromoteInteger); + } else { + NumRegistersForVT[MVT::f32] = NumRegistersForVT[MVT::i32]; + RegisterTypeForVT[MVT::f32] = RegisterTypeForVT[MVT::i32]; + TransformToType[MVT::f32] = MVT::i32; + ValueTypeActions.setTypeAction(MVT::f32, TypeSoftenFloat); + } + } + + // Loop over all of the vector value types to see which need transformations. + for (unsigned i = MVT::FIRST_VECTOR_VALUETYPE; + i <= (unsigned)MVT::LAST_VECTOR_VALUETYPE; ++i) { + MVT VT = (MVT::SimpleValueType)i; + if (isTypeLegal(VT)) continue; + + // Determine if there is a legal wider type. If so, we should promote to + // that wider vector type. + MVT EltVT = VT.getVectorElementType(); + unsigned NElts = VT.getVectorNumElements(); + if (NElts != 1 && !shouldSplitVectorElementType(EltVT)) { + bool IsLegalWiderType = false; + // First try to promote the elements of integer vectors. If no legal + // promotion was found, fallback to the widen-vector method. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + // Promote vectors of integers to vectors with the same number + // of elements, with a wider element type. + if (SVT.getVectorElementType().getSizeInBits() > EltVT.getSizeInBits() + && SVT.getVectorNumElements() == NElts && + isTypeLegal(SVT) && SVT.getScalarType().isInteger()) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypePromoteInteger); + IsLegalWiderType = true; + break; + } + } + + if (IsLegalWiderType) continue; + + // Try to widen the vector. + for (unsigned nVT = i+1; nVT <= MVT::LAST_VECTOR_VALUETYPE; ++nVT) { + MVT SVT = (MVT::SimpleValueType)nVT; + if (SVT.getVectorElementType() == EltVT && + SVT.getVectorNumElements() > NElts && + isTypeLegal(SVT)) { + TransformToType[i] = SVT; + RegisterTypeForVT[i] = SVT; + NumRegistersForVT[i] = 1; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + IsLegalWiderType = true; + break; + } + } + if (IsLegalWiderType) continue; + } + + MVT IntermediateVT; + MVT RegisterVT; + unsigned NumIntermediates; + NumRegistersForVT[i] = + getVectorTypeBreakdownMVT(VT, IntermediateVT, NumIntermediates, + RegisterVT, this); + RegisterTypeForVT[i] = RegisterVT; + + MVT NVT = VT.getPow2VectorType(); + if (NVT == VT) { + // Type is already a power of 2. The default action is to split. + TransformToType[i] = MVT::Other; + unsigned NumElts = VT.getVectorNumElements(); + ValueTypeActions.setTypeAction(VT, + NumElts > 1 ? TypeSplitVector : TypeScalarizeVector); + } else { + TransformToType[i] = NVT; + ValueTypeActions.setTypeAction(VT, TypeWidenVector); + } + } + + // Determine the 'representative' register class for each value type. + // An representative register class is the largest (meaning one which is + // not a sub-register class / subreg register class) legal register class for + // a group of value types. For example, on i386, i8, i16, and i32 + // representative would be GR32; while on x86_64 it's GR64. + for (unsigned i = 0; i != MVT::LAST_VALUETYPE; ++i) { + const TargetRegisterClass* RRC; + uint8_t Cost; + tie(RRC, Cost) = findRepresentativeClass((MVT::SimpleValueType)i); + RepRegClassForVT[i] = RRC; + RepRegClassCostForVT[i] = Cost; + } +} + +EVT TargetLoweringBase::getSetCCResultType(EVT VT) const { + assert(!VT.isVector() && "No default SetCC type for vectors!"); + return getPointerTy(0).SimpleTy; +} + +MVT::SimpleValueType TargetLoweringBase::getCmpLibcallReturnType() const { + return MVT::i32; // return the default value +} + +/// getVectorTypeBreakdown - Vector types are broken down into some number of +/// legal first class types. For example, MVT::v8f32 maps to 2 MVT::v4f32 +/// with Altivec or SSE1, or 8 promoted MVT::f64 values with the X86 FP stack. +/// Similarly, MVT::v2i64 turns into 4 MVT::i32 values with both PPC and X86. +/// +/// This method returns the number of registers needed, and the VT for each +/// register. It also returns the VT and quantity of the intermediate values +/// before they are promoted/expanded. +/// +unsigned TargetLoweringBase::getVectorTypeBreakdown(LLVMContext &Context, EVT VT, + EVT &IntermediateVT, + unsigned &NumIntermediates, + MVT &RegisterVT) const { + unsigned NumElts = VT.getVectorNumElements(); + + // If there is a wider vector type with the same element type as this one, + // or a promoted vector type that has the same number of elements which + // are wider, then we should convert to that legal vector type. + // This handles things like <2 x float> -> <4 x float> and + // <4 x i1> -> <4 x i32>. + LegalizeTypeAction TA = getTypeAction(Context, VT); + if (NumElts != 1 && (TA == TypeWidenVector || TA == TypePromoteInteger)) { + EVT RegisterEVT = getTypeToTransformTo(Context, VT); + if (isTypeLegal(RegisterEVT)) { + IntermediateVT = RegisterEVT; + RegisterVT = RegisterEVT.getSimpleVT(); + NumIntermediates = 1; + return 1; + } + } + + // Figure out the right, legal destination reg to copy into. + EVT EltTy = VT.getVectorElementType(); + + unsigned NumVectorRegs = 1; + + // FIXME: We don't support non-power-of-2-sized vectors for now. Ideally we + // could break down into LHS/RHS like LegalizeDAG does. + if (!isPowerOf2_32(NumElts)) { + NumVectorRegs = NumElts; + NumElts = 1; + } + + // Divide the input until we get to a supported size. This will always + // end with a scalar if the target doesn't support vectors. + while (NumElts > 1 && !isTypeLegal( + EVT::getVectorVT(Context, EltTy, NumElts))) { + NumElts >>= 1; + NumVectorRegs <<= 1; + } + + NumIntermediates = NumVectorRegs; + + EVT NewVT = EVT::getVectorVT(Context, EltTy, NumElts); + if (!isTypeLegal(NewVT)) + NewVT = EltTy; + IntermediateVT = NewVT; + + MVT DestVT = getRegisterType(Context, NewVT); + RegisterVT = DestVT; + unsigned NewVTSize = NewVT.getSizeInBits(); + + // Convert sizes such as i33 to i64. + if (!isPowerOf2_32(NewVTSize)) + NewVTSize = NextPowerOf2(NewVTSize); + + if (EVT(DestVT).bitsLT(NewVT)) // Value is expanded, e.g. i64 -> i16. + return NumVectorRegs*(NewVTSize/DestVT.getSizeInBits()); + + // Otherwise, promotion or legal types use the same number of registers as + // the vector decimated to the appropriate level. + return NumVectorRegs; +} + +/// Get the EVTs and ArgFlags collections that represent the legalized return +/// type of the given function. This does not require a DAG or a return value, +/// and is suitable for use before any DAGs for the function are constructed. +/// TODO: Move this out of TargetLowering.cpp. +void llvm::GetReturnInfo(Type* ReturnType, AttributeSet attr, + SmallVectorImpl &Outs, + const TargetLowering &TLI) { + SmallVector ValueVTs; + ComputeValueVTs(TLI, ReturnType, ValueVTs); + unsigned NumValues = ValueVTs.size(); + if (NumValues == 0) return; + + for (unsigned j = 0, f = NumValues; j != f; ++j) { + EVT VT = ValueVTs[j]; + ISD::NodeType ExtendKind = ISD::ANY_EXTEND; + + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + ExtendKind = ISD::SIGN_EXTEND; + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + ExtendKind = ISD::ZERO_EXTEND; + + // FIXME: C calling convention requires the return type to be promoted to + // at least 32-bit. But this is not necessary for non-C calling + // conventions. The frontend should mark functions whose return values + // require promoting with signext or zeroext attributes. + if (ExtendKind != ISD::ANY_EXTEND && VT.isInteger()) { + MVT MinVT = TLI.getRegisterType(ReturnType->getContext(), MVT::i32); + if (VT.bitsLT(MinVT)) + VT = MinVT; + } + + unsigned NumParts = TLI.getNumRegisters(ReturnType->getContext(), VT); + MVT PartVT = TLI.getRegisterType(ReturnType->getContext(), VT); + + // 'inreg' on function refers to return value + ISD::ArgFlagsTy Flags = ISD::ArgFlagsTy(); + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::InReg)) + Flags.setInReg(); + + // Propagate extension type if any + if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::SExt)) + Flags.setSExt(); + else if (attr.hasAttribute(AttributeSet::ReturnIndex, Attribute::ZExt)) + Flags.setZExt(); + + for (unsigned i = 0; i < NumParts; ++i) + Outs.push_back(ISD::OutputArg(Flags, PartVT, /*isFixed=*/true, 0, 0)); + } +} + +/// getByValTypeAlignment - Return the desired alignment for ByVal aggregate +/// function arguments in the caller parameter area. This is the actual +/// alignment, not its logarithm. +unsigned TargetLoweringBase::getByValTypeAlignment(Type *Ty) const { + return TD->getCallFrameTypeAlignment(Ty); +} + +//===----------------------------------------------------------------------===// +// TargetTransformInfo Helpers +//===----------------------------------------------------------------------===// + +int TargetLoweringBase::InstructionOpcodeToISD(unsigned Opcode) const { + enum InstructionOpcodes { +#define HANDLE_INST(NUM, OPCODE, CLASS) OPCODE = NUM, +#define LAST_OTHER_INST(NUM) InstructionOpcodesCount = NUM +#include "llvm/IR/Instruction.def" + }; + switch (static_cast(Opcode)) { + case Ret: return 0; + case Br: return 0; + case Switch: return 0; + case IndirectBr: return 0; + case Invoke: return 0; + case Resume: return 0; + case Unreachable: return 0; + case Add: return ISD::ADD; + case FAdd: return ISD::FADD; + case Sub: return ISD::SUB; + case FSub: return ISD::FSUB; + case Mul: return ISD::MUL; + case FMul: return ISD::FMUL; + case UDiv: return ISD::UDIV; + case SDiv: return ISD::UDIV; + case FDiv: return ISD::FDIV; + case URem: return ISD::UREM; + case SRem: return ISD::SREM; + case FRem: return ISD::FREM; + case Shl: return ISD::SHL; + case LShr: return ISD::SRL; + case AShr: return ISD::SRA; + case And: return ISD::AND; + case Or: return ISD::OR; + case Xor: return ISD::XOR; + case Alloca: return 0; + case Load: return ISD::LOAD; + case Store: return ISD::STORE; + case GetElementPtr: return 0; + case Fence: return 0; + case AtomicCmpXchg: return 0; + case AtomicRMW: return 0; + case Trunc: return ISD::TRUNCATE; + case ZExt: return ISD::ZERO_EXTEND; + case SExt: return ISD::SIGN_EXTEND; + case FPToUI: return ISD::FP_TO_UINT; + case FPToSI: return ISD::FP_TO_SINT; + case UIToFP: return ISD::UINT_TO_FP; + case SIToFP: return ISD::SINT_TO_FP; + case FPTrunc: return ISD::FP_ROUND; + case FPExt: return ISD::FP_EXTEND; + case PtrToInt: return ISD::BITCAST; + case IntToPtr: return ISD::BITCAST; + case BitCast: return ISD::BITCAST; + case ICmp: return ISD::SETCC; + case FCmp: return ISD::SETCC; + case PHI: return 0; + case Call: return 0; + case Select: return ISD::SELECT; + case UserOp1: return 0; + case UserOp2: return 0; + case VAArg: return 0; + case ExtractElement: return ISD::EXTRACT_VECTOR_ELT; + case InsertElement: return ISD::INSERT_VECTOR_ELT; + case ShuffleVector: return ISD::VECTOR_SHUFFLE; + case ExtractValue: return ISD::MERGE_VALUES; + case InsertValue: return ISD::MERGE_VALUES; + case LandingPad: return 0; + } + + llvm_unreachable("Unknown instruction type encountered!"); +} + +std::pair +TargetLoweringBase::getTypeLegalizationCost(Type *Ty) const { + LLVMContext &C = Ty->getContext(); + EVT MTy = getValueType(Ty); + + unsigned Cost = 1; + // We keep legalizing the type until we find a legal kind. We assume that + // the only operation that costs anything is the split. After splitting + // we need to handle two types. + while (true) { + LegalizeKind LK = getTypeConversion(C, MTy); + + if (LK.first == TypeLegal) + return std::make_pair(Cost, MTy.getSimpleVT()); + + if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) + Cost *= 2; + + // Keep legalizing the type. + MTy = LK.second; + } +} + +//===----------------------------------------------------------------------===// +// Loop Strength Reduction hooks +//===----------------------------------------------------------------------===// + +/// isLegalAddressingMode - Return true if the addressing mode represented +/// by AM is legal for this target, for a load/store of the specified type. +bool TargetLoweringBase::isLegalAddressingMode(const AddrMode &AM, + Type *Ty) const { + // The default implementation of this implements a conservative RISCy, r+r and + // r+i addr mode. + + // Allows a sign-extended 16-bit immediate field. + if (AM.BaseOffs <= -(1LL << 16) || AM.BaseOffs >= (1LL << 16)-1) + return false; + + // No global is ever allowed as a base. + if (AM.BaseGV) + return false; + + // Only support r+r, + switch (AM.Scale) { + case 0: // "r+i" or just "i", depending on HasBaseReg. + break; + case 1: + if (AM.HasBaseReg && AM.BaseOffs) // "r+r+i" is not allowed. + return false; + // Otherwise we have r+r or r+i. + break; + case 2: + if (AM.HasBaseReg || AM.BaseOffs) // 2*r+r or 2*r+i is not allowed. + return false; + // Allow 2*r as r+r. + break; + } + + return true; +} diff --git a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp index 8f5d770f6651..3bdca4c64078 100644 --- a/lib/CodeGen/TargetLoweringObjectFileImpl.cpp +++ b/lib/CodeGen/TargetLoweringObjectFileImpl.cpp @@ -13,30 +13,29 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/ADT/Triple.h" #include "llvm/CodeGen/MachineModuleInfoImpls.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Module.h" #include "llvm/MC/MCContext.h" #include "llvm/MC/MCExpr.h" -#include "llvm/MC/MCSectionMachO.h" -#include "llvm/MC/MCSectionELF.h" #include "llvm/MC/MCSectionCOFF.h" +#include "llvm/MC/MCSectionELF.h" +#include "llvm/MC/MCSectionMachO.h" #include "llvm/MC/MCStreamer.h" #include "llvm/MC/MCSymbol.h" -#include "llvm/Target/Mangler.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ELF.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/StringExtras.h" -#include "llvm/ADT/Triple.h" +#include "llvm/Target/Mangler.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; using namespace dwarf; @@ -88,6 +87,36 @@ void TargetLoweringObjectFileELF::emitPersonalityValue(MCStreamer &Streamer, Streamer.EmitSymbolValue(Sym, Size); } +const MCExpr *TargetLoweringObjectFileELF:: +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { + + if (Encoding & dwarf::DW_EH_PE_indirect) { + MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo(); + + SmallString<128> Name; + Mang->getNameWithPrefix(Name, GV, true); + Name += ".DW.stub"; + + // Add information about the stub reference to ELFMMI so that the stub + // gets emitted by the asmprinter. + MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); + MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); + if (StubSym.getPointer() == 0) { + MCSymbol *Sym = Mang->getSymbol(GV); + StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); + } + + return TargetLoweringObjectFile:: + getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + } + + return TargetLoweringObjectFile:: + getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); +} + static SectionKind getELFKindForNamedSection(StringRef Name, SectionKind K) { // N.B.: The defaults used in here are no the same ones used in MC. @@ -314,35 +343,6 @@ getSectionForConstant(SectionKind Kind) const { return DataRelROSection; } -const MCExpr *TargetLoweringObjectFileELF:: -getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, - unsigned Encoding, MCStreamer &Streamer) const { - - if (Encoding & dwarf::DW_EH_PE_indirect) { - MachineModuleInfoELF &ELFMMI = MMI->getObjFileInfo(); - - SmallString<128> Name; - Mang->getNameWithPrefix(Name, GV, true); - Name += ".DW.stub"; - - // Add information about the stub reference to ELFMMI so that the stub - // gets emitted by the asmprinter. - MCSymbol *SSym = getContext().GetOrCreateSymbol(Name.str()); - MachineModuleInfoImpl::StubValueTy &StubSym = ELFMMI.getGVStubEntry(SSym); - if (StubSym.getPointer() == 0) { - MCSymbol *Sym = Mang->getSymbol(GV); - StubSym = MachineModuleInfoImpl::StubValueTy(Sym, !GV->hasLocalLinkage()); - } - - return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); - } - - return TargetLoweringObjectFile:: - getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); -} - const MCSection * TargetLoweringObjectFileELF::getStaticCtorSection(unsigned Priority) const { // The default scheme is .ctor / .dtor, so we have to invert the priority @@ -405,14 +405,14 @@ TargetLoweringObjectFileELF::InitializeELF(bool UseInitArray_) { // MachO //===----------------------------------------------------------------------===// -/// emitModuleFlags - Emit the module flags that specify the garbage collection -/// information. +/// emitModuleFlags - Perform code emission for module flags. void TargetLoweringObjectFileMachO:: emitModuleFlags(MCStreamer &Streamer, ArrayRef ModuleFlags, Mangler *Mang, const TargetMachine &TM) const { unsigned VersionVal = 0; unsigned ImageInfoFlags = 0; + MDNode *LinkerOptions = 0; StringRef SectionVal; for (ArrayRef::iterator @@ -426,14 +426,33 @@ emitModuleFlags(MCStreamer &Streamer, StringRef Key = MFE.Key->getString(); Value *Val = MFE.Val; - if (Key == "Objective-C Image Info Version") + if (Key == "Objective-C Image Info Version") { VersionVal = cast(Val)->getZExtValue(); - else if (Key == "Objective-C Garbage Collection" || - Key == "Objective-C GC Only" || - Key == "Objective-C Is Simulated") + } else if (Key == "Objective-C Garbage Collection" || + Key == "Objective-C GC Only" || + Key == "Objective-C Is Simulated") { ImageInfoFlags |= cast(Val)->getZExtValue(); - else if (Key == "Objective-C Image Info Section") + } else if (Key == "Objective-C Image Info Section") { SectionVal = cast(Val)->getString(); + } else if (Key == "Linker Options") { + LinkerOptions = cast(Val); + } + } + + // Emit the linker options if present. + if (LinkerOptions) { + for (unsigned i = 0, e = LinkerOptions->getNumOperands(); i != e; ++i) { + MDNode *MDOptions = cast(LinkerOptions->getOperand(i)); + SmallVector StrOptions; + + // Convert to strings. + for (unsigned ii = 0, ie = MDOptions->getNumOperands(); ii != ie; ++ii) { + MDString *MDOption = cast(MDOptions->getOperand(ii)); + StrOptions.push_back(MDOption->getString()); + } + + Streamer.EmitLinkerOptions(StrOptions); + } } // The section is mandatory. If we don't have it, then we don't have GC info. @@ -604,9 +623,9 @@ shouldEmitUsedDirectiveFor(const GlobalValue *GV, Mangler *Mang) const { } const MCExpr *TargetLoweringObjectFileMachO:: -getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, - MachineModuleInfo *MMI, unsigned Encoding, - MCStreamer &Streamer) const { +getTTypeGlobalReference(const GlobalValue *GV, Mangler *Mang, + MachineModuleInfo *MMI, unsigned Encoding, + MCStreamer &Streamer) const { // The mach-o version of this method defaults to returning a stub reference. if (Encoding & DW_EH_PE_indirect) { @@ -629,11 +648,12 @@ getExprForDwarfGlobalReference(const GlobalValue *GV, Mangler *Mang, } return TargetLoweringObjectFile:: - getExprForDwarfReference(SSym, Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); + getTTypeReference(MCSymbolRefExpr::Create(SSym, getContext()), + Encoding & ~dwarf::DW_EH_PE_indirect, Streamer); } return TargetLoweringObjectFile:: - getExprForDwarfGlobalReference(GV, Mang, MMI, Encoding, Streamer); + getTTypeGlobalReference(GV, Mang, MMI, Encoding, Streamer); } MCSymbol *TargetLoweringObjectFileMachO:: @@ -701,8 +721,19 @@ getCOFFSectionFlags(SectionKind K) { const MCSection *TargetLoweringObjectFileCOFF:: getExplicitSectionGlobal(const GlobalValue *GV, SectionKind Kind, Mangler *Mang, const TargetMachine &TM) const { - return getContext().getCOFFSection(GV->getSection(), - getCOFFSectionFlags(Kind), + int Selection = 0; + unsigned Characteristics = getCOFFSectionFlags(Kind); + SmallString<128> Name(GV->getSection().c_str()); + if (GV->isWeakForLinker()) { + Selection = COFF::IMAGE_COMDAT_SELECT_ANY; + Characteristics |= COFF::IMAGE_SCN_LNK_COMDAT; + MCSymbol *Sym = Mang->getSymbol(GV); + Name.append("$"); + Name.append(Sym->getName().begin() + 1, Sym->getName().end()); + } + return getContext().getCOFFSection(Name, + Characteristics, + Selection, Kind); } @@ -711,8 +742,11 @@ static const char *getCOFFSectionPrefixForUniqueGlobal(SectionKind Kind) { return ".text$"; if (Kind.isBSS ()) return ".bss$"; - if (Kind.isThreadLocal()) - return ".tls$"; + if (Kind.isThreadLocal()) { + // 'LLVM' is just an arbitary string to ensure that the section name gets + // sorted in between '.tls$AAA' and '.tls$ZZZ' by the linker. + return ".tls$LLVM"; + } if (Kind.isWriteable()) return ".data$"; return ".rdata$"; diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp new file mode 100644 index 000000000000..84b4bfc33221 --- /dev/null +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -0,0 +1,285 @@ +//===- TargetRegisterInfo.cpp - Target Register Information Implementation ===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the TargetRegisterInfo interface. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Target/TargetRegisterInfo.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/CodeGen/MachineFunction.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/VirtRegMap.h" +#include "llvm/Support/raw_ostream.h" + +using namespace llvm; + +TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, + regclass_iterator RCB, regclass_iterator RCE, + const char *const *SRINames, + const unsigned *SRILaneMasks) + : InfoDesc(ID), SubRegIndexNames(SRINames), + SubRegIndexLaneMasks(SRILaneMasks), + RegClassBegin(RCB), RegClassEnd(RCE) { +} + +TargetRegisterInfo::~TargetRegisterInfo() {} + +void PrintReg::print(raw_ostream &OS) const { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); + else + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } +} + +void PrintRegUnit::print(raw_ostream &OS) const { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } + + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } + + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); +} + +/// getAllocatableClass - Return the maximal subclass of the given register +/// class that is alloctable, or NULL. +const TargetRegisterClass * +TargetRegisterInfo::getAllocatableClass(const TargetRegisterClass *RC) const { + if (!RC || RC->isAllocatable()) + return RC; + + const unsigned *SubClass = RC->getSubClassMask(); + for (unsigned Base = 0, BaseE = getNumRegClasses(); + Base < BaseE; Base += 32) { + unsigned Idx = Base; + for (unsigned Mask = *SubClass++; Mask; Mask >>= 1) { + unsigned Offset = CountTrailingZeros_32(Mask); + const TargetRegisterClass *SubRC = getRegClass(Idx + Offset); + if (SubRC->isAllocatable()) + return SubRC; + Mask >>= Offset; + Idx += Offset + 1; + } + } + return NULL; +} + +/// getMinimalPhysRegClass - Returns the Register Class of a physical +/// register of the given type, picking the most sub register class of +/// the right type that contains this physreg. +const TargetRegisterClass * +TargetRegisterInfo::getMinimalPhysRegClass(unsigned reg, EVT VT) const { + assert(isPhysicalRegister(reg) && "reg must be a physical register"); + + // Pick the most sub register class of the right type that contains + // this physreg. + const TargetRegisterClass* BestRC = 0; + for (regclass_iterator I = regclass_begin(), E = regclass_end(); I != E; ++I){ + const TargetRegisterClass* RC = *I; + if ((VT == MVT::Other || RC->hasType(VT)) && RC->contains(reg) && + (!BestRC || BestRC->hasSubClass(RC))) + BestRC = RC; + } + + assert(BestRC && "Couldn't find the register class"); + return BestRC; +} + +/// getAllocatableSetForRC - Toggle the bits that represent allocatable +/// registers for the specific register class. +static void getAllocatableSetForRC(const MachineFunction &MF, + const TargetRegisterClass *RC, BitVector &R){ + assert(RC->isAllocatable() && "invalid for nonallocatable sets"); + ArrayRef Order = RC->getRawAllocationOrder(MF); + for (unsigned i = 0; i != Order.size(); ++i) + R.set(Order[i]); +} + +BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, + const TargetRegisterClass *RC) const { + BitVector Allocatable(getNumRegs()); + if (RC) { + // A register class with no allocatable subclass returns an empty set. + const TargetRegisterClass *SubClass = getAllocatableClass(RC); + if (SubClass) + getAllocatableSetForRC(MF, SubClass, Allocatable); + } else { + for (TargetRegisterInfo::regclass_iterator I = regclass_begin(), + E = regclass_end(); I != E; ++I) + if ((*I)->isAllocatable()) + getAllocatableSetForRC(MF, *I, Allocatable); + } + + // Mask out the reserved registers + BitVector Reserved = getReservedRegs(MF); + Allocatable &= Reserved.flip(); + + return Allocatable; +} + +static inline +const TargetRegisterClass *firstCommonClass(const uint32_t *A, + const uint32_t *B, + const TargetRegisterInfo *TRI) { + for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) + if (unsigned Common = *A++ & *B++) + return TRI->getRegClass(I + CountTrailingZeros_32(Common)); + return 0; +} + +const TargetRegisterClass * +TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, + const TargetRegisterClass *B) const { + // First take care of the trivial cases. + if (A == B) + return A; + if (!A || !B) + return 0; + + // Register classes are ordered topologically, so the largest common + // sub-class it the common sub-class with the smallest ID. + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); +} + +const TargetRegisterClass * +TargetRegisterInfo::getMatchingSuperRegClass(const TargetRegisterClass *A, + const TargetRegisterClass *B, + unsigned Idx) const { + assert(A && B && "Missing register class"); + assert(Idx && "Bad sub-register index"); + + // Find Idx in the list of super-register indices. + for (SuperRegClassIterator RCI(B, this); RCI.isValid(); ++RCI) + if (RCI.getSubReg() == Idx) + // The bit mask contains all register classes that are projected into B + // by Idx. Find a class that is also a sub-class of A. + return firstCommonClass(RCI.getMask(), A->getSubClassMask(), this); + return 0; +} + +const TargetRegisterClass *TargetRegisterInfo:: +getCommonSuperRegClass(const TargetRegisterClass *RCA, unsigned SubA, + const TargetRegisterClass *RCB, unsigned SubB, + unsigned &PreA, unsigned &PreB) const { + assert(RCA && SubA && RCB && SubB && "Invalid arguments"); + + // Search all pairs of sub-register indices that project into RCA and RCB + // respectively. This is quadratic, but usually the sets are very small. On + // most targets like X86, there will only be a single sub-register index + // (e.g., sub_16bit projecting into GR16). + // + // The worst case is a register class like DPR on ARM. + // We have indices dsub_0..dsub_7 projecting into that class. + // + // It is very common that one register class is a sub-register of the other. + // Arrange for RCA to be the larger register so the answer will be found in + // the first iteration. This makes the search linear for the most common + // case. + const TargetRegisterClass *BestRC = 0; + unsigned *BestPreA = &PreA; + unsigned *BestPreB = &PreB; + if (RCA->getSize() < RCB->getSize()) { + std::swap(RCA, RCB); + std::swap(SubA, SubB); + std::swap(BestPreA, BestPreB); + } + + // Also terminate the search one we have found a register class as small as + // RCA. + unsigned MinSize = RCA->getSize(); + + for (SuperRegClassIterator IA(RCA, this, true); IA.isValid(); ++IA) { + unsigned FinalA = composeSubRegIndices(IA.getSubReg(), SubA); + for (SuperRegClassIterator IB(RCB, this, true); IB.isValid(); ++IB) { + // Check if a common super-register class exists for this index pair. + const TargetRegisterClass *RC = + firstCommonClass(IA.getMask(), IB.getMask(), this); + if (!RC || RC->getSize() < MinSize) + continue; + + // The indexes must compose identically: PreA+SubA == PreB+SubB. + unsigned FinalB = composeSubRegIndices(IB.getSubReg(), SubB); + if (FinalA != FinalB) + continue; + + // Is RC a better candidate than BestRC? + if (BestRC && RC->getSize() >= BestRC->getSize()) + continue; + + // Yes, RC is the smallest super-register seen so far. + BestRC = RC; + *BestPreA = IA.getSubReg(); + *BestPreB = IB.getSubReg(); + + // Bail early if we reached MinSize. We won't find a better candidate. + if (BestRC->getSize() == MinSize) + return BestRC; + } + } + return BestRC; +} + +// Compute target-independent register allocator hints to help eliminate copies. +void +TargetRegisterInfo::getRegAllocationHints(unsigned VirtReg, + ArrayRef Order, + SmallVectorImpl &Hints, + const MachineFunction &MF, + const VirtRegMap *VRM) const { + const MachineRegisterInfo &MRI = MF.getRegInfo(); + std::pair Hint = MRI.getRegAllocationHint(VirtReg); + + // Hints with HintType != 0 were set by target-dependent code. + // Such targets must provide their own implementation of + // TRI::getRegAllocationHints to interpret those hint types. + assert(Hint.first == 0 && "Target must implement TRI::getRegAllocationHints"); + + // Target-independent hints are either a physical or a virtual register. + unsigned Phys = Hint.second; + if (VRM && isVirtualRegister(Phys)) + Phys = VRM->getPhys(Phys); + + // Check that Phys is a valid hint in VirtReg's register class. + if (!isPhysicalRegister(Phys)) + return; + if (MRI.isReserved(Phys)) + return; + // Check that Phys is in the allocation order. We shouldn't heed hints + // from VirtReg's register class if they aren't in the allocation order. The + // target probably has a reason for removing the register. + if (std::find(Order.begin(), Order.end(), Phys) == Order.end()) + return; + + // All clear, tell the register allocator to prefer this register. + Hints.push_back(Phys); +} diff --git a/lib/CodeGen/TargetSchedule.cpp b/lib/CodeGen/TargetSchedule.cpp index ca3b0e0b1173..783bfa1c1a18 100644 --- a/lib/CodeGen/TargetSchedule.cpp +++ b/lib/CodeGen/TargetSchedule.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/TargetSchedule.h" +#include "llvm/Support/CommandLine.h" +#include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" -#include "llvm/Support/CommandLine.h" -#include "llvm/Support/raw_ostream.h" using namespace llvm; @@ -240,7 +240,10 @@ unsigned TargetSchedModel::computeOperandLatency( report_fatal_error(ss.str()); } #endif - return DefMI->isTransient() ? 0 : 1; + // FIXME: Automatically giving all implicit defs defaultDefLatency is + // undesirable. We should only do it for defs that are known to the MC + // desc like flags. Truly implicit defs should get 1 cycle latency. + return DefMI->isTransient() ? 0 : TII->defaultDefLatency(&SchedModel, DefMI); } unsigned TargetSchedModel::computeInstrLatency(const MachineInstr *MI) const { diff --git a/lib/CodeGen/TwoAddressInstructionPass.cpp b/lib/CodeGen/TwoAddressInstructionPass.cpp index a9058bc7f6d9..e6dfe104c82f 100644 --- a/lib/CodeGen/TwoAddressInstructionPass.cpp +++ b/lib/CodeGen/TwoAddressInstructionPass.cpp @@ -29,26 +29,25 @@ #define DEBUG_TYPE "twoaddrinstr" #include "llvm/CodeGen/Passes.h" -#include "llvm/Function.h" +#include "llvm/ADT/BitVector.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallSet.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/AliasAnalysis.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveVariables.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineInstr.h" #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" -#include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCInstrItineraries.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/ADT/BitVector.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallSet.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; STATISTIC(NumTwoAddressInstrs, "Number of two-address instructions"); @@ -67,7 +66,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { const InstrItineraryData *InstrItins; MachineRegisterInfo *MRI; LiveVariables *LV; - SlotIndexes *Indexes; LiveIntervals *LIS; AliasAnalysis *AA; CodeGenOpt::Level OptLevel; @@ -92,10 +90,6 @@ class TwoAddressInstructionPass : public MachineFunctionPass { // virtual registers. e.g. r1 = move v1024. DenseMap DstRegMap; - /// RegSequences - Keep track the list of REG_SEQUENCE instructions seen - /// during the initial walk of the machine function. - SmallVector RegSequences; - bool sink3AddrInstruction(MachineInstr *MI, unsigned Reg, MachineBasicBlock::iterator OldPos); @@ -125,7 +119,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { bool tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned SrcIdx, unsigned DstIdx, - unsigned Dist); + unsigned Dist, bool shouldOnlyCommute); void scanUses(unsigned DstReg); @@ -135,11 +129,7 @@ class TwoAddressInstructionPass : public MachineFunctionPass { typedef SmallDenseMap TiedOperandMap; bool collectTiedOperands(MachineInstr *MI, TiedOperandMap&); void processTiedPairs(MachineInstr *MI, TiedPairList&, unsigned &Dist); - - /// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part of - /// the de-ssa process. This replaces sources of REG_SEQUENCE as sub-register - /// references of the register defined by REG_SEQUENCE. - bool eliminateRegSequences(); + void eliminateRegSequence(MachineBasicBlock::iterator&); public: static char ID; // Pass identification, replacement for typeid @@ -172,6 +162,8 @@ INITIALIZE_PASS_END(TwoAddressInstructionPass, "twoaddressinstruction", char &llvm::TwoAddressInstructionPassID = TwoAddressInstructionPass::ID; +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, LiveIntervals *LIS); + /// sink3AddrInstruction - A two-address instruction has been converted to a /// three-address instruction to avoid clobbering a register. Try to sink it /// past the instruction that would kill the above mentioned register to reduce @@ -213,14 +205,29 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, // Find the instruction that kills SavedReg. MachineInstr *KillMI = NULL; - for (MachineRegisterInfo::use_nodbg_iterator - UI = MRI->use_nodbg_begin(SavedReg), - UE = MRI->use_nodbg_end(); UI != UE; ++UI) { - MachineOperand &UseMO = UI.getOperand(); - if (!UseMO.isKill()) - continue; - KillMI = UseMO.getParent(); - break; + if (LIS) { + LiveInterval &LI = LIS->getInterval(SavedReg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } + if (!KillMI) { + for (MachineRegisterInfo::use_nodbg_iterator + UI = MRI->use_nodbg_begin(SavedReg), + UE = MRI->use_nodbg_end(); UI != UE; ++UI) { + MachineOperand &UseMO = UI.getOperand(); + if (!UseMO.isKill()) + continue; + KillMI = UseMO.getParent(); + break; + } } // If we find the instruction that kills SavedReg, and it is in an @@ -259,7 +266,7 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, if (DefReg == MOReg) return false; - if (MO.isKill()) { + if (MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS))) { if (OtherMI == KillMI && MOReg == SavedReg) // Save the operand that kills the register. We want to unset the kill // marker if we can sink MI past it. @@ -272,13 +279,15 @@ sink3AddrInstruction(MachineInstr *MI, unsigned SavedReg, } assert(KillMO && "Didn't find kill"); - // Update kill and LV information. - KillMO->setIsKill(false); - KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); - KillMO->setIsKill(true); + if (!LIS) { + // Update kill and LV information. + KillMO->setIsKill(false); + KillMO = MI->findRegisterUseOperand(SavedReg, false, TRI); + KillMO->setIsKill(true); - if (LV) - LV->replaceKillInstruction(SavedReg, KillMI, MI); + if (LV) + LV->replaceKillInstruction(SavedReg, KillMI, MI); + } // Move instruction to its destination. MBB->remove(MI); @@ -339,6 +348,33 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, return true; } +/// isPLainlyKilled - Test if the given register value, which is used by the +// given instruction, is killed by the given instruction. +static bool isPlainlyKilled(MachineInstr *MI, unsigned Reg, + LiveIntervals *LIS) { + if (LIS && TargetRegisterInfo::isVirtualRegister(Reg) && + !LIS->isNotInMIMap(MI)) { + // FIXME: Sometimes tryInstructionTransform() will add instructions and + // test whether they can be folded before keeping them. In this case it + // sets a kill before recursively calling tryInstructionTransform() again. + // If there is no interval available, we assume that this instruction is + // one of those. A kill flag is manually inserted on the operand so the + // check below will handle it. + LiveInterval &LI = LIS->getInterval(Reg); + // This is to match the kill flag version where undefs don't have kill + // flags. + if (!LI.hasAtLeastOneValue()) + return false; + + SlotIndex useIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(useIdx); + assert(I != LI.end() && "Reg must be live-in to use."); + return !I->end.isBlock() && SlotIndex::isSameInstr(I->end, useIdx); + } + + return MI->killsRegister(Reg); +} + /// isKilled - Test if the given register value, which is used by the given /// instruction, is killed by the given instruction. This looks through /// coalescable copies to see if the original value is potentially not killed. @@ -354,12 +390,20 @@ static bool isCopyToReg(MachineInstr &MI, const TargetInstrInfo *TII, /// normal heuristics commute the (two-address) add, which lets /// coalescing eliminate the extra copy. /// +/// If allowFalsePositives is true then likely kills are treated as kills even +/// if it can't be proven that they are kills. static bool isKilled(MachineInstr &MI, unsigned Reg, const MachineRegisterInfo *MRI, - const TargetInstrInfo *TII) { + const TargetInstrInfo *TII, + LiveIntervals *LIS, + bool allowFalsePositives) { MachineInstr *DefMI = &MI; for (;;) { - if (!DefMI->killsRegister(Reg)) + // All uses of physical registers are likely to be kills. + if (TargetRegisterInfo::isPhysicalRegister(Reg) && + (allowFalsePositives || MRI->hasOneUse(Reg))) + return true; + if (!isPlainlyKilled(DefMI, Reg, LIS)) return false; if (TargetRegisterInfo::isPhysicalRegister(Reg)) return true; @@ -480,7 +524,7 @@ isProfitableToCommute(unsigned regA, unsigned regB, unsigned regC, // insert => %reg1030 = MOV8rr %reg1029 // %reg1030 = ADD8rr %reg1029, %reg1028, %EFLAGS - if (!MI->killsRegister(regC)) + if (!isPlainlyKilled(MI, regC, LIS)) return false; // Ok, we have something like: @@ -536,19 +580,9 @@ commuteInstruction(MachineBasicBlock::iterator &mi, } DEBUG(dbgs() << "2addr: COMMUTED TO: " << *NewMI); - // If the instruction changed to commute it, update livevar. - if (NewMI != MI) { - if (LV) - // Update live variables - LV->replaceKillInstruction(RegC, MI, NewMI); - if (Indexes) - Indexes->replaceMachineInstrInMaps(MI, NewMI); - - MBB->insert(mi, NewMI); // Insert the new inst - MBB->erase(mi); // Nuke the old inst. - mi = NewMI; - DistanceMap.insert(std::make_pair(NewMI, Dist)); - } + assert(NewMI == MI && + "TargetInstrInfo::commuteInstruction() should not return a new " + "instruction unless it was requested."); // Update source register map. unsigned FromRegC = getMappedReg(RegC, SrcRegMap); @@ -595,8 +629,8 @@ TwoAddressInstructionPass::convertInstTo3Addr(MachineBasicBlock::iterator &mi, DEBUG(dbgs() << "2addr: TO 3-ADDR: " << *NewMI); bool Sunk = false; - if (Indexes) - Indexes->replaceMachineInstrInMaps(mi, NewMI); + if (LIS) + LIS->ReplaceMachineInstrInMaps(mi, NewMI); if (NewMI->findRegisterUseOperand(RegB, false, TRI)) // FIXME: Temporary workaround. If the new instruction doesn't @@ -708,9 +742,9 @@ bool TwoAddressInstructionPass:: rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -719,7 +753,22 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -755,24 +804,27 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, Defs.insert(MOReg); else { Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (MOReg != Reg && (MO.isKill() || + (LIS && isPlainlyKilled(MI, MOReg, LIS)))) Kills.insert(MOReg); } } // Move the copies connected to MI down as well. - MachineBasicBlock::iterator From = MI; - MachineBasicBlock::iterator To = llvm::next(From); - while (To->isCopy() && Defs.count(To->getOperand(1).getReg())) { - Defs.insert(To->getOperand(0).getReg()); - ++To; + MachineBasicBlock::iterator Begin = MI; + MachineBasicBlock::iterator AfterMI = llvm::next(Begin); + + MachineBasicBlock::iterator End = AfterMI; + while (End->isCopy() && Defs.count(End->getOperand(1).getReg())) { + Defs.insert(End->getOperand(0).getReg()); + ++End; } // Check if the reschedule will not break depedencies. unsigned NumVisited = 0; MachineBasicBlock::iterator KillPos = KillMI; ++KillPos; - for (MachineBasicBlock::iterator I = To; I != KillPos; ++I) { + for (MachineBasicBlock::iterator I = End; I != KillPos; ++I) { MachineInstr *OtherMI = I; // DBG_VALUE cannot be counted against the limit. if (OtherMI->isDebugValue()) @@ -803,11 +855,13 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } else { if (Defs.count(MOReg)) return false; + bool isKill = MO.isKill() || + (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)); if (MOReg != Reg && - ((MO.isKill() && Uses.count(MOReg)) || Kills.count(MOReg))) + ((isKill && Uses.count(MOReg)) || Kills.count(MOReg))) // Don't want to extend other live ranges and update kills. return false; - if (MOReg == Reg && !MO.isKill()) + if (MOReg == Reg && !isKill) // We can't schedule across a use of the register in question. return false; // Ensure that if this is register in question, its the kill we expect. @@ -818,19 +872,35 @@ rescheduleMIBelowKill(MachineBasicBlock::iterator &mi, } // Move debug info as well. - while (From != MBB->begin() && llvm::prior(From)->isDebugValue()) - --From; + while (Begin != MBB->begin() && llvm::prior(Begin)->isDebugValue()) + --Begin; + + nmi = End; + MachineBasicBlock::iterator InsertPos = KillPos; + if (LIS) { + // We have to move the copies first so that the MBB is still well-formed + // when calling handleMove(). + for (MachineBasicBlock::iterator MBBI = AfterMI; MBBI != End;) { + MachineInstr *CopyMI = MBBI; + ++MBBI; + MBB->splice(InsertPos, MBB, CopyMI); + LIS->handleMove(CopyMI); + InsertPos = CopyMI; + } + End = llvm::next(MachineBasicBlock::iterator(MI)); + } // Copies following MI may have been moved as well. - nmi = To; - MBB->splice(KillPos, MBB, From, To); + MBB->splice(InsertPos, MBB, Begin, End); DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(MI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled below kill: " << *KillMI); return true; @@ -866,9 +936,9 @@ bool TwoAddressInstructionPass:: rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, unsigned Reg) { - // Bail immediately if we don't have LV available. We use it to find kills - // efficiently. - if (!LV) + // Bail immediately if we don't have LV or LIS available. We use them to find + // kills efficiently. + if (!LV && !LIS) return false; MachineInstr *MI = &*mi; @@ -877,7 +947,22 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, // Must be created from unfolded load. Don't waste time trying this. return false; - MachineInstr *KillMI = LV->getVarInfo(Reg).findKill(MBB); + MachineInstr *KillMI = 0; + if (LIS) { + LiveInterval &LI = LIS->getInterval(Reg); + assert(LI.end() != LI.begin() && + "Reg should not have empty live interval."); + + SlotIndex MBBEndIdx = LIS->getMBBEndIdx(MBB).getPrevSlot(); + LiveInterval::const_iterator I = LI.find(MBBEndIdx); + if (I != LI.end() && I->start < MBBEndIdx) + return false; + + --I; + KillMI = LIS->getInstructionFromIndex(I->end); + } else { + KillMI = LV->getVarInfo(Reg).findKill(MBB); + } if (!KillMI || MI == KillMI || KillMI->isCopy() || KillMI->isCopyLike()) // Don't mess with copies, they may be coalesced later. return false; @@ -904,10 +989,11 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, continue; if (isDefTooClose(MOReg, DI->second, MI)) return false; - if (MOReg == Reg && !MO.isKill()) + bool isKill = MO.isKill() || (LIS && isPlainlyKilled(KillMI, MOReg, LIS)); + if (MOReg == Reg && !isKill) return false; Uses.insert(MOReg); - if (MO.isKill() && MOReg != Reg) + if (isKill && MOReg != Reg) Kills.insert(MOReg); } else if (TargetRegisterInfo::isPhysicalRegister(MOReg)) { Defs.insert(MOReg); @@ -947,7 +1033,8 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, if (Kills.count(MOReg)) // Don't want to extend other live ranges and update kills. return false; - if (OtherMI != MI && MOReg == Reg && !MO.isKill()) + if (OtherMI != MI && MOReg == Reg && + !(MO.isKill() || (LIS && isPlainlyKilled(OtherMI, MOReg, LIS)))) // We can't schedule across a use of the register in question. return false; } else { @@ -981,10 +1068,12 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, DistanceMap.erase(DI); // Update live variables - LV->removeVirtualRegisterKilled(Reg, KillMI); - LV->addVirtualRegisterKilled(Reg, MI); - if (LIS) + if (LIS) { LIS->handleMove(KillMI); + } else { + LV->removeVirtualRegisterKilled(Reg, KillMI); + LV->addVirtualRegisterKilled(Reg, MI); + } DEBUG(dbgs() << "\trescheduled kill: " << *KillMI); return true; @@ -995,11 +1084,13 @@ rescheduleKillAboveMI(MachineBasicBlock::iterator &mi, /// either eliminate the tied operands or improve the opportunities for /// coalescing away the register copy. Returns true if no copy needs to be /// inserted to untie mi's operands (either because they were untied, or -/// because mi was rescheduled, and will be visited again later). +/// because mi was rescheduled, and will be visited again later). If the +/// shouldOnlyCommute flag is true, only instruction commutation is attempted. bool TwoAddressInstructionPass:: tryInstructionTransform(MachineBasicBlock::iterator &mi, MachineBasicBlock::iterator &nmi, - unsigned SrcIdx, unsigned DstIdx, unsigned Dist) { + unsigned SrcIdx, unsigned DstIdx, + unsigned Dist, bool shouldOnlyCommute) { if (OptLevel == CodeGenOpt::None) return false; @@ -1009,7 +1100,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, assert(TargetRegisterInfo::isVirtualRegister(regB) && "cannot make instruction into two-address form"); - bool regBKilled = isKilled(MI, regB, MRI, TII); + bool regBKilled = isKilled(MI, regB, MRI, TII, LIS, true); if (TargetRegisterInfo::isVirtualRegister(regA)) scanUses(regA); @@ -1029,7 +1120,7 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, if (regCIdx != ~0U) { regC = MI.getOperand(regCIdx).getReg(); - if (!regBKilled && isKilled(MI, regC, MRI, TII)) + if (!regBKilled && isKilled(MI, regC, MRI, TII, LIS, false)) // If C dies but B does not, swap the B and C operands. // This makes the live ranges of A and C joinable. TryCommute = true; @@ -1048,6 +1139,9 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, return false; } + if (shouldOnlyCommute) + return false; + // If there is one more use of regB later in the same MBB, consider // re-schedule this MI below it. if (rescheduleMIBelowKill(mi, nmi, regB)) { @@ -1123,10 +1217,12 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, unsigned NewDstIdx = NewMIs[1]->findRegisterDefOperandIdx(regA); unsigned NewSrcIdx = NewMIs[1]->findRegisterUseOperandIdx(regB); MachineBasicBlock::iterator NewMI = NewMIs[1]; - bool TransformSuccess = - tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist); - if (TransformSuccess || - NewMIs[1]->getOperand(NewSrcIdx).isKill()) { + bool TransformResult = + tryInstructionTransform(NewMI, mi, NewSrcIdx, NewDstIdx, Dist, true); + (void)TransformResult; + assert(!TransformResult && + "tryInstructionTransform() should return false."); + if (NewMIs[1]->getOperand(NewSrcIdx).isKill()) { // Success, or at least we made an improvement. Keep the unfolded // instructions and discard the original. if (LV) { @@ -1157,10 +1253,26 @@ tryInstructionTransform(MachineBasicBlock::iterator &mi, } LV->addVirtualRegisterKilled(Reg, NewMIs[1]); } + + SmallVector OrigRegs; + if (LIS) { + for (MachineInstr::const_mop_iterator MOI = MI.operands_begin(), + MOE = MI.operands_end(); MOI != MOE; ++MOI) { + if (MOI->isReg()) + OrigRegs.push_back(MOI->getReg()); + } + } + MI.eraseFromParent(); + + // Update LiveIntervals. + if (LIS) { + MachineBasicBlock::iterator Begin(NewMIs[0]); + MachineBasicBlock::iterator End(NewMIs[1]); + LIS->repairIntervalsInRange(MBB, Begin, End, OrigRegs); + } + mi = NewMIs[1]; - if (TransformSuccess) - return true; } else { // Transforming didn't eliminate the tie and didn't lead to an // improvement. Clean up the unfolded instructions and keep the @@ -1223,9 +1335,15 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, TiedPairList &TiedPairs, unsigned &Dist) { bool IsEarlyClobber = false; + for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { + const MachineOperand &DstMO = MI->getOperand(TiedPairs[tpi].second); + IsEarlyClobber |= DstMO.isEarlyClobber(); + } + bool RemovedKillFlag = false; bool AllUsesCopied = true; unsigned LastCopiedReg = 0; + SlotIndex LastCopyIdx; unsigned RegB = 0; for (unsigned tpi = 0, tpe = TiedPairs.size(); tpi != tpe; ++tpi) { unsigned SrcIdx = TiedPairs[tpi].first; @@ -1233,7 +1351,6 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, const MachineOperand &DstMO = MI->getOperand(DstIdx); unsigned RegA = DstMO.getReg(); - IsEarlyClobber |= DstMO.isEarlyClobber(); // Grab RegB from the instruction because it may have changed if the // instruction was commuted. @@ -1271,9 +1388,17 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, DistanceMap.insert(std::make_pair(PrevMI, Dist)); DistanceMap[MI] = ++Dist; - SlotIndex CopyIdx; - if (Indexes) - CopyIdx = Indexes->insertMachineInstrInMaps(PrevMI).getRegSlot(); + if (LIS) { + LastCopyIdx = LIS->InsertMachineInstrInMaps(PrevMI).getRegSlot(); + + if (TargetRegisterInfo::isVirtualRegister(RegA)) { + LiveInterval &LI = LIS->getInterval(RegA); + VNInfo *VNI = LI.getNextValue(LastCopyIdx, LIS->getVNInfoAllocator()); + SlotIndex endIdx = + LIS->getInstructionIndex(MI).getRegSlot(IsEarlyClobber); + LI.addRange(LiveRange(LastCopyIdx, endIdx, VNI)); + } + } DEBUG(dbgs() << "\t\tprepend:\t" << *PrevMI); @@ -1319,6 +1444,18 @@ TwoAddressInstructionPass::processTiedPairs(MachineInstr *MI, LV->addVirtualRegisterKilled(RegB, PrevMI); } + // Update LiveIntervals. + if (LIS) { + LiveInterval &LI = LIS->getInterval(RegB); + SlotIndex MIIdx = LIS->getInstructionIndex(MI); + LiveInterval::const_iterator I = LI.find(MIIdx); + assert(I != LI.end() && "RegB must be live-in to use."); + + SlotIndex UseIdx = MIIdx.getRegSlot(IsEarlyClobber); + if (I->end == UseIdx) + LI.removeRange(LastCopyIdx, UseIdx); + } + } else if (RemovedKillFlag) { // Some tied uses of regB matched their destination registers, so // regB is still used in this instruction, but a kill flag was @@ -1343,7 +1480,6 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { TII = TM.getInstrInfo(); TRI = TM.getRegisterInfo(); InstrItins = TM.getInstrItineraryData(); - Indexes = getAnalysisIfAvailable(); LV = getAnalysisIfAvailable(); LIS = getAnalysisIfAvailable(); AA = &getAnalysis(); @@ -1375,9 +1511,10 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { continue; } - // Remember REG_SEQUENCE instructions, we'll deal with them later. + // Expand REG_SEQUENCE instructions. This will position mi at the first + // expanded instruction. if (mi->isRegSequence()) - RegSequences.push_back(&*mi); + eliminateRegSequence(mi); DistanceMap.insert(std::make_pair(mi, ++Dist)); @@ -1406,7 +1543,7 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { unsigned SrcReg = mi->getOperand(SrcIdx).getReg(); unsigned DstReg = mi->getOperand(DstIdx).getReg(); if (SrcReg != DstReg && - tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist)) { + tryInstructionTransform(mi, nmi, SrcIdx, DstIdx, Dist, false)) { // The tied operands have been eliminated or shifted further down the // block to ease elimination. Continue processing with 'nmi'. TiedOperands.clear(); @@ -1444,192 +1581,98 @@ bool TwoAddressInstructionPass::runOnMachineFunction(MachineFunction &Func) { } } - // Eliminate REG_SEQUENCE instructions. Their whole purpose was to preseve - // SSA form. It's now safe to de-SSA. - MadeChange |= eliminateRegSequences(); + if (LIS) + MF->verify(this, "After two-address instruction pass"); return MadeChange; } -static void UpdateRegSequenceSrcs(unsigned SrcReg, - unsigned DstReg, unsigned SubIdx, - MachineRegisterInfo *MRI, - const TargetRegisterInfo &TRI) { - for (MachineRegisterInfo::reg_iterator RI = MRI->reg_begin(SrcReg), - RE = MRI->reg_end(); RI != RE; ) { - MachineOperand &MO = RI.getOperand(); - ++RI; - MO.substVirtReg(DstReg, SubIdx, TRI); +/// Eliminate a REG_SEQUENCE instruction as part of the de-ssa process. +/// +/// The instruction is turned into a sequence of sub-register copies: +/// +/// %dst = REG_SEQUENCE %v1, ssub0, %v2, ssub1 +/// +/// Becomes: +/// +/// %dst:ssub0 = COPY %v1 +/// %dst:ssub1 = COPY %v2 +/// +void TwoAddressInstructionPass:: +eliminateRegSequence(MachineBasicBlock::iterator &MBBI) { + MachineInstr *MI = MBBI; + unsigned DstReg = MI->getOperand(0).getReg(); + if (MI->getOperand(0).getSubReg() || + TargetRegisterInfo::isPhysicalRegister(DstReg) || + !(MI->getNumOperands() & 1)) { + DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); + llvm_unreachable(0); } -} - -// Find the first def of Reg, assuming they are all in the same basic block. -static MachineInstr *findFirstDef(unsigned Reg, MachineRegisterInfo *MRI) { - SmallPtrSet Defs; - MachineInstr *First = 0; - for (MachineRegisterInfo::def_iterator RI = MRI->def_begin(Reg); - MachineInstr *MI = RI.skipInstruction(); Defs.insert(MI)) - First = MI; - if (!First) - return 0; - - MachineBasicBlock *MBB = First->getParent(); - MachineBasicBlock::iterator A = First, B = First; - bool Moving; - do { - Moving = false; - if (A != MBB->begin()) { - Moving = true; - --A; - if (Defs.erase(A)) First = A; - } - if (B != MBB->end()) { - Defs.erase(B); - ++B; - Moving = true; - } - } while (Moving && !Defs.empty()); - assert(Defs.empty() && "Instructions outside basic block!"); - return First; -} -static bool HasOtherRegSequenceUses(unsigned Reg, MachineInstr *RegSeq, - MachineRegisterInfo *MRI) { - for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(Reg), - UE = MRI->use_end(); UI != UE; ++UI) { - MachineInstr *UseMI = &*UI; - if (UseMI != RegSeq && UseMI->isRegSequence()) - return true; + SmallVector OrigRegs; + if (LIS) { + OrigRegs.push_back(MI->getOperand(0).getReg()); + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) + OrigRegs.push_back(MI->getOperand(i).getReg()); } - return false; -} -/// eliminateRegSequences - Eliminate REG_SEQUENCE instructions as part -/// of the de-ssa process. This replaces sources of REG_SEQUENCE as -/// sub-register references of the register defined by REG_SEQUENCE. e.g. -/// -/// %reg1029, %reg1030 = VLD1q16 %reg1024, ... -/// %reg1031 = REG_SEQUENCE %reg1029, 5, %reg1030, 6 -/// => -/// %reg1031:5, %reg1031:6 = VLD1q16 %reg1024, ... -bool TwoAddressInstructionPass::eliminateRegSequences() { - if (RegSequences.empty()) - return false; - - for (unsigned i = 0, e = RegSequences.size(); i != e; ++i) { - MachineInstr *MI = RegSequences[i]; - unsigned DstReg = MI->getOperand(0).getReg(); - if (MI->getOperand(0).getSubReg() || - TargetRegisterInfo::isPhysicalRegister(DstReg) || - !(MI->getNumOperands() & 1)) { - DEBUG(dbgs() << "Illegal REG_SEQUENCE instruction:" << *MI); - llvm_unreachable(0); - } - - bool IsImpDef = true; - SmallVector RealSrcs; - SmallSet Seen; - for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { - // Nothing needs to be inserted for operands. - if (MI->getOperand(i).isUndef()) { - MI->getOperand(i).setReg(0); - continue; - } - unsigned SrcReg = MI->getOperand(i).getReg(); - unsigned SrcSubIdx = MI->getOperand(i).getSubReg(); - unsigned SubIdx = MI->getOperand(i+1).getImm(); - // DefMI of NULL means the value does not have a vreg in this block - // i.e., its a physical register or a subreg. - // In either case we force a copy to be generated. - MachineInstr *DefMI = NULL; - if (!MI->getOperand(i).getSubReg() && - !TargetRegisterInfo::isPhysicalRegister(SrcReg)) { - DefMI = MRI->getUniqueVRegDef(SrcReg); - } + bool DefEmitted = false; + for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { + MachineOperand &UseMO = MI->getOperand(i); + unsigned SrcReg = UseMO.getReg(); + unsigned SubIdx = MI->getOperand(i+1).getImm(); + // Nothing needs to be inserted for operands. + if (UseMO.isUndef()) + continue; - if (DefMI && DefMI->isImplicitDef()) { - DefMI->eraseFromParent(); - continue; - } - IsImpDef = false; - - // Remember COPY sources. These might be candidate for coalescing. - if (DefMI && DefMI->isCopy() && DefMI->getOperand(1).getSubReg()) - RealSrcs.push_back(DefMI->getOperand(1).getReg()); - - bool isKill = MI->getOperand(i).isKill(); - if (!DefMI || !Seen.insert(SrcReg) || - MI->getParent() != DefMI->getParent() || - !isKill || HasOtherRegSequenceUses(SrcReg, MI, MRI) || - !TRI->getMatchingSuperRegClass(MRI->getRegClass(DstReg), - MRI->getRegClass(SrcReg), SubIdx)) { - // REG_SEQUENCE cannot have duplicated operands, add a copy. - // Also add an copy if the source is live-in the block. We don't want - // to end up with a partial-redef of a livein, e.g. - // BB0: - // reg1051:10 = - // ... - // BB1: - // ... = reg1051:10 - // BB2: - // reg1051:9 = - // LiveIntervalAnalysis won't like it. - // - // If the REG_SEQUENCE doesn't kill its source, keeping live variables - // correctly up to date becomes very difficult. Insert a copy. - - // Defer any kill flag to the last operand using SrcReg. Otherwise, we - // might insert a COPY that uses SrcReg after is was killed. - if (isKill) - for (unsigned j = i + 2; j < e; j += 2) - if (MI->getOperand(j).getReg() == SrcReg) { - MI->getOperand(j).setIsKill(); - isKill = false; - break; - } + // Defer any kill flag to the last operand using SrcReg. Otherwise, we + // might insert a COPY that uses SrcReg after is was killed. + bool isKill = UseMO.isKill(); + if (isKill) + for (unsigned j = i + 2; j < e; j += 2) + if (MI->getOperand(j).getReg() == SrcReg) { + MI->getOperand(j).setIsKill(); + UseMO.setIsKill(false); + isKill = false; + break; + } - MachineBasicBlock::iterator InsertLoc = MI; - MachineInstr *CopyMI = BuildMI(*MI->getParent(), InsertLoc, - MI->getDebugLoc(), TII->get(TargetOpcode::COPY)) - .addReg(DstReg, RegState::Define, SubIdx) - .addReg(SrcReg, getKillRegState(isKill), SrcSubIdx); - MI->getOperand(i).setReg(0); - if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) - LV->replaceKillInstruction(SrcReg, MI, CopyMI); - DEBUG(dbgs() << "Inserted: " << *CopyMI); - } + // Insert the sub-register copy. + MachineInstr *CopyMI = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), + TII->get(TargetOpcode::COPY)) + .addReg(DstReg, RegState::Define, SubIdx) + .addOperand(UseMO); + + // The first def needs an flag because there is no live register + // before it. + if (!DefEmitted) { + CopyMI->getOperand(0).setIsUndef(true); + // Return an iterator pointing to the first inserted instr. + MBBI = CopyMI; } + DefEmitted = true; - for (unsigned i = 1, e = MI->getNumOperands(); i < e; i += 2) { - unsigned SrcReg = MI->getOperand(i).getReg(); - if (!SrcReg) continue; - unsigned SubIdx = MI->getOperand(i+1).getImm(); - UpdateRegSequenceSrcs(SrcReg, DstReg, SubIdx, MRI, *TRI); - } + // Update LiveVariables' kill info. + if (LV && isKill && !TargetRegisterInfo::isPhysicalRegister(SrcReg)) + LV->replaceKillInstruction(SrcReg, MI, CopyMI); - // Set flags on the first DstReg def in the basic block. - // It marks the beginning of the live range. All the other defs are - // read-modify-write. - if (MachineInstr *Def = findFirstDef(DstReg, MRI)) { - for (unsigned i = 0, e = Def->getNumOperands(); i != e; ++i) { - MachineOperand &MO = Def->getOperand(i); - if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg) - MO.setIsUndef(); - } - DEBUG(dbgs() << "First def: " << *Def); - } + DEBUG(dbgs() << "Inserted: " << *CopyMI); + } - if (IsImpDef) { - DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); - MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); - for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) - MI->RemoveOperand(j); - } else { - DEBUG(dbgs() << "Eliminated: " << *MI); - MI->eraseFromParent(); - } + MachineBasicBlock::iterator EndMBBI = + llvm::next(MachineBasicBlock::iterator(MI)); + + if (!DefEmitted) { + DEBUG(dbgs() << "Turned: " << *MI << " into an IMPLICIT_DEF"); + MI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF)); + for (int j = MI->getNumOperands() - 1, ee = 0; j > ee; --j) + MI->RemoveOperand(j); + } else { + DEBUG(dbgs() << "Eliminated: " << *MI); + MI->eraseFromParent(); } - RegSequences.clear(); - return true; + // Udpate LiveIntervals. + if (LIS) + LIS->repairIntervalsInRange(MBB, MBBI, EndMBBI, OrigRegs); } diff --git a/lib/CodeGen/UnreachableBlockElim.cpp b/lib/CodeGen/UnreachableBlockElim.cpp index 52693f03e828..a95ebcd16da8 100644 --- a/lib/CodeGen/UnreachableBlockElim.cpp +++ b/lib/CodeGen/UnreachableBlockElim.cpp @@ -21,22 +21,22 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/Passes.h" -#include "llvm/Constant.h" -#include "llvm/Instructions.h" -#include "llvm/Function.h" -#include "llvm/Pass.h" -#include "llvm/Type.h" +#include "llvm/ADT/DepthFirstIterator.h" +#include "llvm/ADT/SmallPtrSet.h" #include "llvm/Analysis/Dominators.h" #include "llvm/Analysis/ProfileInfo.h" #include "llvm/CodeGen/MachineDominators.h" #include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineLoopInfo.h" +#include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/IR/Constant.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/Type.h" +#include "llvm/Pass.h" #include "llvm/Support/CFG.h" #include "llvm/Target/TargetInstrInfo.h" -#include "llvm/ADT/DepthFirstIterator.h" -#include "llvm/ADT/SmallPtrSet.h" using namespace llvm; namespace { diff --git a/lib/CodeGen/VirtRegMap.cpp b/lib/CodeGen/VirtRegMap.cpp index bb93bdc0bc25..cd012d297489 100644 --- a/lib/CodeGen/VirtRegMap.cpp +++ b/lib/CodeGen/VirtRegMap.cpp @@ -17,8 +17,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "regalloc" -#include "VirtRegMap.h" +#include "llvm/CodeGen/VirtRegMap.h" #include "LiveDebugVariables.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" #include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/LiveStackAnalysis.h" #include "llvm/CodeGen/MachineFrameInfo.h" @@ -26,15 +28,13 @@ #include "llvm/CodeGen/MachineInstrBuilder.h" #include "llvm/CodeGen/MachineRegisterInfo.h" #include "llvm/CodeGen/Passes.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetRegisterInfo.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/STLExtras.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetRegisterInfo.h" #include using namespace llvm; @@ -77,15 +77,22 @@ unsigned VirtRegMap::createSpillSlot(const TargetRegisterClass *RC) { return SS; } -unsigned VirtRegMap::getRegAllocPref(unsigned virtReg) { - std::pair Hint = MRI->getRegAllocationHint(virtReg); - unsigned physReg = Hint.second; - if (TargetRegisterInfo::isVirtualRegister(physReg) && hasPhys(physReg)) - physReg = getPhys(physReg); - if (Hint.first == 0) - return (TargetRegisterInfo::isPhysicalRegister(physReg)) - ? physReg : 0; - return TRI->ResolveRegAllocHint(Hint.first, physReg, *MF); +bool VirtRegMap::hasPreferredPhys(unsigned VirtReg) { + unsigned Hint = MRI->getSimpleHint(VirtReg); + if (!Hint) + return 0; + if (TargetRegisterInfo::isVirtualRegister(Hint)) + Hint = getPhys(Hint); + return getPhys(VirtReg) == Hint; +} + +bool VirtRegMap::hasKnownPreference(unsigned VirtReg) { + std::pair Hint = MRI->getRegAllocationHint(VirtReg); + if (TargetRegisterInfo::isPhysicalRegister(Hint.second)) + return true; + if (TargetRegisterInfo::isVirtualRegister(Hint.second)) + return hasPhys(Hint.second); + return false; } int VirtRegMap::assignVirt2StackSlot(unsigned virtReg) { diff --git a/lib/CodeGen/VirtRegMap.h b/lib/CodeGen/VirtRegMap.h deleted file mode 100644 index 7974dda66a5f..000000000000 --- a/lib/CodeGen/VirtRegMap.h +++ /dev/null @@ -1,190 +0,0 @@ -//===-- llvm/CodeGen/VirtRegMap.h - Virtual Register Map -*- C++ -*--------===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file implements a virtual register map. This maps virtual registers to -// physical registers and virtual registers to stack slots. It is created and -// updated by a register allocator and then used by a machine code rewriter that -// adds spill code and rewrites virtual into physical register references. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_CODEGEN_VIRTREGMAP_H -#define LLVM_CODEGEN_VIRTREGMAP_H - -#include "llvm/CodeGen/MachineFunctionPass.h" -#include "llvm/Target/TargetRegisterInfo.h" -#include "llvm/ADT/IndexedMap.h" - -namespace llvm { - class MachineInstr; - class MachineFunction; - class MachineRegisterInfo; - class TargetInstrInfo; - class raw_ostream; - class SlotIndexes; - - class VirtRegMap : public MachineFunctionPass { - public: - enum { - NO_PHYS_REG = 0, - NO_STACK_SLOT = (1L << 30)-1, - MAX_STACK_SLOT = (1L << 18)-1 - }; - - private: - MachineRegisterInfo *MRI; - const TargetInstrInfo *TII; - const TargetRegisterInfo *TRI; - MachineFunction *MF; - - /// Virt2PhysMap - This is a virtual to physical register - /// mapping. Each virtual register is required to have an entry in - /// it; even spilled virtual registers (the register mapped to a - /// spilled register is the temporary used to load it from the - /// stack). - IndexedMap Virt2PhysMap; - - /// Virt2StackSlotMap - This is virtual register to stack slot - /// mapping. Each spilled virtual register has an entry in it - /// which corresponds to the stack slot this register is spilled - /// at. - IndexedMap Virt2StackSlotMap; - - /// Virt2SplitMap - This is virtual register to splitted virtual register - /// mapping. - IndexedMap Virt2SplitMap; - - /// createSpillSlot - Allocate a spill slot for RC from MFI. - unsigned createSpillSlot(const TargetRegisterClass *RC); - - VirtRegMap(const VirtRegMap&) LLVM_DELETED_FUNCTION; - void operator=(const VirtRegMap&) LLVM_DELETED_FUNCTION; - - public: - static char ID; - VirtRegMap() : MachineFunctionPass(ID), Virt2PhysMap(NO_PHYS_REG), - Virt2StackSlotMap(NO_STACK_SLOT), Virt2SplitMap(0) { } - virtual bool runOnMachineFunction(MachineFunction &MF); - - virtual void getAnalysisUsage(AnalysisUsage &AU) const { - AU.setPreservesAll(); - MachineFunctionPass::getAnalysisUsage(AU); - } - - MachineFunction &getMachineFunction() const { - assert(MF && "getMachineFunction called before runOnMachineFunction"); - return *MF; - } - - MachineRegisterInfo &getRegInfo() const { return *MRI; } - const TargetRegisterInfo &getTargetRegInfo() const { return *TRI; } - - void grow(); - - /// @brief returns true if the specified virtual register is - /// mapped to a physical register - bool hasPhys(unsigned virtReg) const { - return getPhys(virtReg) != NO_PHYS_REG; - } - - /// @brief returns the physical register mapped to the specified - /// virtual register - unsigned getPhys(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2PhysMap[virtReg]; - } - - /// @brief creates a mapping for the specified virtual register to - /// the specified physical register - void assignVirt2Phys(unsigned virtReg, unsigned physReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg) && - TargetRegisterInfo::isPhysicalRegister(physReg)); - assert(Virt2PhysMap[virtReg] == NO_PHYS_REG && - "attempt to assign physical register to already mapped " - "virtual register"); - Virt2PhysMap[virtReg] = physReg; - } - - /// @brief clears the specified virtual register's, physical - /// register mapping - void clearVirt(unsigned virtReg) { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - assert(Virt2PhysMap[virtReg] != NO_PHYS_REG && - "attempt to clear a not assigned virtual register"); - Virt2PhysMap[virtReg] = NO_PHYS_REG; - } - - /// @brief clears all virtual to physical register mappings - void clearAllVirt() { - Virt2PhysMap.clear(); - grow(); - } - - /// @brief returns the register allocation preference. - unsigned getRegAllocPref(unsigned virtReg); - - /// @brief returns true if VirtReg is assigned to its preferred physreg. - bool hasPreferredPhys(unsigned VirtReg) { - return getPhys(VirtReg) == getRegAllocPref(VirtReg); - } - - /// @brief records virtReg is a split live interval from SReg. - void setIsSplitFromReg(unsigned virtReg, unsigned SReg) { - Virt2SplitMap[virtReg] = SReg; - } - - /// @brief returns the live interval virtReg is split from. - unsigned getPreSplitReg(unsigned virtReg) const { - return Virt2SplitMap[virtReg]; - } - - /// getOriginal - Return the original virtual register that VirtReg descends - /// from through splitting. - /// A register that was not created by splitting is its own original. - /// This operation is idempotent. - unsigned getOriginal(unsigned VirtReg) const { - unsigned Orig = getPreSplitReg(VirtReg); - return Orig ? Orig : VirtReg; - } - - /// @brief returns true if the specified virtual register is not - /// mapped to a stack slot or rematerialized. - bool isAssignedReg(unsigned virtReg) const { - if (getStackSlot(virtReg) == NO_STACK_SLOT) - return true; - // Split register can be assigned a physical register as well as a - // stack slot or remat id. - return (Virt2SplitMap[virtReg] && Virt2PhysMap[virtReg] != NO_PHYS_REG); - } - - /// @brief returns the stack slot mapped to the specified virtual - /// register - int getStackSlot(unsigned virtReg) const { - assert(TargetRegisterInfo::isVirtualRegister(virtReg)); - return Virt2StackSlotMap[virtReg]; - } - - /// @brief create a mapping for the specifed virtual register to - /// the next available stack slot - int assignVirt2StackSlot(unsigned virtReg); - /// @brief create a mapping for the specified virtual register to - /// the specified stack slot - void assignVirt2StackSlot(unsigned virtReg, int frameIndex); - - void print(raw_ostream &OS, const Module* M = 0) const; - void dump() const; - }; - - inline raw_ostream &operator<<(raw_ostream &OS, const VirtRegMap &VRM) { - VRM.print(OS); - return OS; - } -} // End llvm namespace - -#endif diff --git a/lib/DebugInfo/CMakeLists.txt b/lib/DebugInfo/CMakeLists.txt index 1e9e509fd2a1..e97455abace2 100644 --- a/lib/DebugInfo/CMakeLists.txt +++ b/lib/DebugInfo/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMDebugInfo DWARFDebugAbbrev.cpp DWARFDebugArangeSet.cpp DWARFDebugAranges.cpp + DWARFDebugFrame.cpp DWARFDebugInfoEntry.cpp DWARFDebugLine.cpp DWARFDebugRangeList.cpp diff --git a/lib/DebugInfo/DIContext.cpp b/lib/DebugInfo/DIContext.cpp index 691a92c392c2..49a44097d3e2 100644 --- a/lib/DebugInfo/DIContext.cpp +++ b/lib/DebugInfo/DIContext.cpp @@ -13,15 +13,6 @@ using namespace llvm; DIContext::~DIContext() {} -DIContext *DIContext::getDWARFContext(bool isLittleEndian, - StringRef infoSection, - StringRef abbrevSection, - StringRef aRangeSection, - StringRef lineSection, - StringRef stringSection, - StringRef rangeSection, - const RelocAddrMap &Map) { - return new DWARFContextInMemory(isLittleEndian, infoSection, abbrevSection, - aRangeSection, lineSection, stringSection, - rangeSection, Map); +DIContext *DIContext::getDWARFContext(object::ObjectFile *Obj) { + return new DWARFContextInMemory(Obj); } diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp index 0df692c3a3b7..2de62ab9380d 100644 --- a/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp +++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.cpp @@ -23,7 +23,7 @@ bool DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr, uint32_t code) { Code = code; - Attributes.clear(); + Attribute.clear(); if (Code) { Tag = data.getULEB128(offset_ptr); HasChildren = data.getU8(offset_ptr); @@ -33,7 +33,7 @@ DWARFAbbreviationDeclaration::extract(DataExtractor data, uint32_t* offset_ptr, uint16_t form = data.getULEB128(offset_ptr); if (attr && form) - Attributes.push_back(DWARFAttribute(attr, form)); + Attribute.push_back(DWARFAttribute(attr, form)); else break; } @@ -55,19 +55,19 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { else OS << format("DW_TAG_Unknown_%x", getTag()); OS << "\tDW_CHILDREN_" << (hasChildren() ? "yes" : "no") << '\n'; - for (unsigned i = 0, e = Attributes.size(); i != e; ++i) { + for (unsigned i = 0, e = Attribute.size(); i != e; ++i) { OS << '\t'; - const char *attrString = AttributeString(Attributes[i].getAttribute()); + const char *attrString = AttributeString(Attribute[i].getAttribute()); if (attrString) OS << attrString; else - OS << format("DW_AT_Unknown_%x", Attributes[i].getAttribute()); + OS << format("DW_AT_Unknown_%x", Attribute[i].getAttribute()); OS << '\t'; - const char *formString = FormEncodingString(Attributes[i].getForm()); + const char *formString = FormEncodingString(Attribute[i].getForm()); if (formString) OS << formString; else - OS << format("DW_FORM_Unknown_%x", Attributes[i].getForm()); + OS << format("DW_FORM_Unknown_%x", Attribute[i].getForm()); OS << '\n'; } OS << '\n'; @@ -75,8 +75,8 @@ void DWARFAbbreviationDeclaration::dump(raw_ostream &OS) const { uint32_t DWARFAbbreviationDeclaration::findAttributeIndex(uint16_t attr) const { - for (uint32_t i = 0, e = Attributes.size(); i != e; ++i) { - if (Attributes[i].getAttribute() == attr) + for (uint32_t i = 0, e = Attribute.size(); i != e; ++i) { + if (Attribute[i].getAttribute() == attr) return i; } return -1U; diff --git a/lib/DebugInfo/DWARFAbbreviationDeclaration.h b/lib/DebugInfo/DWARFAbbreviationDeclaration.h index 2463a3cc0494..9a3fcd8a783c 100644 --- a/lib/DebugInfo/DWARFAbbreviationDeclaration.h +++ b/lib/DebugInfo/DWARFAbbreviationDeclaration.h @@ -22,7 +22,7 @@ class DWARFAbbreviationDeclaration { uint32_t Code; uint32_t Tag; bool HasChildren; - SmallVector Attributes; + SmallVector Attribute; public: enum { InvalidCode = 0 }; DWARFAbbreviationDeclaration() @@ -31,12 +31,12 @@ public: uint32_t getCode() const { return Code; } uint32_t getTag() const { return Tag; } bool hasChildren() const { return HasChildren; } - uint32_t getNumAttributes() const { return Attributes.size(); } + uint32_t getNumAttributes() const { return Attribute.size(); } uint16_t getAttrByIndex(uint32_t idx) const { - return Attributes.size() > idx ? Attributes[idx].getAttribute() : 0; + return Attribute.size() > idx ? Attribute[idx].getAttribute() : 0; } uint16_t getFormByIndex(uint32_t idx) const { - return Attributes.size() > idx ? Attributes[idx].getForm() : 0; + return Attribute.size() > idx ? Attribute[idx].getForm() : 0; } uint32_t findAttributeIndex(uint16_t attr) const; @@ -45,7 +45,7 @@ public: bool isValid() const { return Code != 0 && Tag != 0; } void dump(raw_ostream &OS) const; const SmallVectorImpl &getAttributes() const { - return Attributes; + return Attribute; } }; diff --git a/lib/DebugInfo/DWARFCompileUnit.cpp b/lib/DebugInfo/DWARFCompileUnit.cpp index bdd65b77e4b6..e3e4ccd7d9e1 100644 --- a/lib/DebugInfo/DWARFCompileUnit.cpp +++ b/lib/DebugInfo/DWARFCompileUnit.cpp @@ -17,8 +17,7 @@ using namespace llvm; using namespace dwarf; DataExtractor DWARFCompileUnit::getDebugInfoExtractor() const { - return DataExtractor(Context.getInfoSection(), - Context.isLittleEndian(), getAddressByteSize()); + return DataExtractor(InfoSection, isLittleEndian, AddrSize); } bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { @@ -28,7 +27,6 @@ bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { if (debug_info.isValidOffset(*offset_ptr)) { uint64_t abbrOffset; - const DWARFDebugAbbrev *abbr = Context.getDebugAbbrev(); Length = debug_info.getU32(offset_ptr); Version = debug_info.getU16(offset_ptr); abbrOffset = debug_info.getU32(offset_ptr); @@ -36,11 +34,11 @@ bool DWARFCompileUnit::extract(DataExtractor debug_info, uint32_t *offset_ptr) { bool lengthOK = debug_info.isValidOffset(getNextCompileUnitOffset()-1); bool versionOK = DWARFContext::isSupportedVersion(Version); - bool abbrOffsetOK = Context.getAbbrevSection().size() > abbrOffset; + bool abbrOffsetOK = AbbrevSection.size() > abbrOffset; bool addrSizeOK = AddrSize == 4 || AddrSize == 8; - if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && abbr != NULL) { - Abbrevs = abbr->getAbbreviationDeclarationSet(abbrOffset); + if (lengthOK && versionOK && addrSizeOK && abbrOffsetOK && Abbrev != NULL) { + Abbrevs = Abbrev->getAbbreviationDeclarationSet(abbrOffset); return true; } @@ -79,8 +77,7 @@ bool DWARFCompileUnit::extractRangeList(uint32_t RangeListOffset, DWARFDebugRangeList &RangeList) const { // Require that compile unit is extracted. assert(DieArray.size() > 0); - DataExtractor RangesData(Context.getRangeSection(), - Context.isLittleEndian(), AddrSize); + DataExtractor RangesData(RangeSection, isLittleEndian, AddrSize); return RangeList.extract(RangesData, &RangeListOffset); } @@ -211,7 +208,7 @@ size_t DWARFCompileUnit::extractDIEsIfNeeded(bool cu_die_only) { // should always terminate at or before the start of the next compilation // unit header). if (offset > next_cu_offset) - fprintf(stderr, "warning: DWARF compile unit extends beyond its" + fprintf(stderr, "warning: DWARF compile unit extends beyond its " "bounds cu 0x%8.8x at 0x%8.8x'\n", getOffset(), offset); setDIERelations(); diff --git a/lib/DebugInfo/DWARFCompileUnit.h b/lib/DebugInfo/DWARFCompileUnit.h index 03e28620d4b3..2a74605fcb2d 100644 --- a/lib/DebugInfo/DWARFCompileUnit.h +++ b/lib/DebugInfo/DWARFCompileUnit.h @@ -13,15 +13,25 @@ #include "DWARFDebugAbbrev.h" #include "DWARFDebugInfoEntry.h" #include "DWARFDebugRangeList.h" +#include "DWARFRelocMap.h" #include namespace llvm { -class DWARFContext; +class DWARFDebugAbbrev; +class StringRef; class raw_ostream; class DWARFCompileUnit { - DWARFContext &Context; + const DWARFDebugAbbrev *Abbrev; + StringRef InfoSection; + StringRef AbbrevSection; + StringRef RangeSection; + StringRef StringSection; + StringRef StringOffsetSection; + StringRef AddrOffsetSection; + const RelocAddrMap *RelocMap; + bool isLittleEndian; uint32_t Offset; uint32_t Length; @@ -32,11 +42,20 @@ class DWARFCompileUnit { // The compile unit debug information entry item. std::vector DieArray; public: - DWARFCompileUnit(DWARFContext &context) : Context(context) { + + DWARFCompileUnit(const DWARFDebugAbbrev *DA, StringRef IS, StringRef AS, + StringRef RS, StringRef SS, StringRef SOS, StringRef AOS, + const RelocAddrMap *M, bool LE) : + Abbrev(DA), InfoSection(IS), AbbrevSection(AS), + RangeSection(RS), StringSection(SS), StringOffsetSection(SOS), + AddrOffsetSection(AOS), RelocMap(M), isLittleEndian(LE) { clear(); } - DWARFContext &getContext() const { return Context; } + StringRef getStringSection() const { return StringSection; } + StringRef getStringOffsetSection() const { return StringOffsetSection; } + StringRef getAddrOffsetSection() const { return AddrOffsetSection; } + const RelocAddrMap *getRelocMap() const { return RelocMap; } DataExtractor getDebugInfoExtractor() const; bool extract(DataExtractor debug_info, uint32_t* offset_ptr); diff --git a/lib/DebugInfo/DWARFContext.cpp b/lib/DebugInfo/DWARFContext.cpp index afd614cc356e..9e19310a99c0 100644 --- a/lib/DebugInfo/DWARFContext.cpp +++ b/lib/DebugInfo/DWARFContext.cpp @@ -19,57 +19,124 @@ using namespace dwarf; typedef DWARFDebugLine::LineTable DWARFLineTable; -void DWARFContext::dump(raw_ostream &OS) { - OS << ".debug_abbrev contents:\n"; - getDebugAbbrev()->dump(OS); +void DWARFContext::dump(raw_ostream &OS, DIDumpType DumpType) { + if (DumpType == DIDT_All || DumpType == DIDT_Abbrev) { + OS << ".debug_abbrev contents:\n"; + getDebugAbbrev()->dump(OS); + } - OS << "\n.debug_info contents:\n"; - for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) - getCompileUnitAtIndex(i)->dump(OS); + if (DumpType == DIDT_All || DumpType == DIDT_Info) { + OS << "\n.debug_info contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) + getCompileUnitAtIndex(i)->dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_Frames) { + OS << "\n.debug_frame contents:\n"; + getDebugFrame()->dump(OS); + } - OS << "\n.debug_aranges contents:\n"; - DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); uint32_t offset = 0; - DWARFDebugArangeSet set; - while (set.extract(arangesData, &offset)) - set.dump(OS); + if (DumpType == DIDT_All || DumpType == DIDT_Aranges) { + OS << "\n.debug_aranges contents:\n"; + DataExtractor arangesData(getARangeSection(), isLittleEndian(), 0); + DWARFDebugArangeSet set; + while (set.extract(arangesData, &offset)) + set.dump(OS); + } uint8_t savedAddressByteSize = 0; - OS << "\n.debug_lines contents:\n"; - for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { - DWARFCompileUnit *cu = getCompileUnitAtIndex(i); - savedAddressByteSize = cu->getAddressByteSize(); - unsigned stmtOffset = - cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, - -1U); - if (stmtOffset != -1U) { - DataExtractor lineData(getLineSection(), isLittleEndian(), + if (DumpType == DIDT_All || DumpType == DIDT_Line) { + OS << "\n.debug_line contents:\n"; + for (unsigned i = 0, e = getNumCompileUnits(); i != e; ++i) { + DWARFCompileUnit *cu = getCompileUnitAtIndex(i); + savedAddressByteSize = cu->getAddressByteSize(); + unsigned stmtOffset = + cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, + -1U); + if (stmtOffset != -1U) { + DataExtractor lineData(getLineSection(), isLittleEndian(), + savedAddressByteSize); + DWARFDebugLine::DumpingState state(OS); + DWARFDebugLine::parseStatementTable(lineData, &lineRelocMap(), &stmtOffset, state); + } + } + } + + if (DumpType == DIDT_All || DumpType == DIDT_Str) { + OS << "\n.debug_str contents:\n"; + DataExtractor strData(getStringSection(), isLittleEndian(), 0); + offset = 0; + uint32_t strOffset = 0; + while (const char *s = strData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", strOffset, s); + strOffset = offset; + } + } + + if (DumpType == DIDT_All || DumpType == DIDT_Ranges) { + OS << "\n.debug_ranges contents:\n"; + // In fact, different compile units may have different address byte + // sizes, but for simplicity we just use the address byte size of the last + // compile unit (there is no easy and fast way to associate address range + // list and the compile unit it describes). + DataExtractor rangesData(getRangeSection(), isLittleEndian(), savedAddressByteSize); - DWARFDebugLine::DumpingState state(OS); - DWARFDebugLine::parseStatementTable(lineData, &stmtOffset, state); + offset = 0; + DWARFDebugRangeList rangeList; + while (rangeList.extract(rangesData, &offset)) + rangeList.dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_Pubnames) { + OS << "\n.debug_pubnames contents:\n"; + DataExtractor pubNames(getPubNamesSection(), isLittleEndian(), 0); + offset = 0; + OS << "Length: " << pubNames.getU32(&offset) << "\n"; + OS << "Version: " << pubNames.getU16(&offset) << "\n"; + OS << "Offset in .debug_info: " << pubNames.getU32(&offset) << "\n"; + OS << "Size: " << pubNames.getU32(&offset) << "\n"; + OS << "\n Offset Name\n"; + while (offset < getPubNamesSection().size()) { + uint32_t n = pubNames.getU32(&offset); + if (n == 0) + break; + OS << format("%8x ", n); + OS << pubNames.getCStr(&offset) << "\n"; } } - OS << "\n.debug_str contents:\n"; - DataExtractor strData(getStringSection(), isLittleEndian(), 0); - offset = 0; - uint32_t lastOffset = 0; - while (const char *s = strData.getCStr(&offset)) { - OS << format("0x%8.8x: \"%s\"\n", lastOffset, s); - lastOffset = offset; + if (DumpType == DIDT_All || DumpType == DIDT_AbbrevDwo) { + OS << "\n.debug_abbrev.dwo contents:\n"; + getDebugAbbrevDWO()->dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_InfoDwo) { + OS << "\n.debug_info.dwo contents:\n"; + for (unsigned i = 0, e = getNumDWOCompileUnits(); i != e; ++i) + getDWOCompileUnitAtIndex(i)->dump(OS); + } + + if (DumpType == DIDT_All || DumpType == DIDT_StrDwo) { + OS << "\n.debug_str.dwo contents:\n"; + DataExtractor strDWOData(getStringDWOSection(), isLittleEndian(), 0); + offset = 0; + uint32_t strDWOOffset = 0; + while (const char *s = strDWOData.getCStr(&offset)) { + OS << format("0x%8.8x: \"%s\"\n", strDWOOffset, s); + strDWOOffset = offset; + } } - OS << "\n.debug_ranges contents:\n"; - // In fact, different compile units may have different address byte - // sizes, but for simplicity we just use the address byte size of the last - // compile unit (there is no easy and fast way to associate address range - // list and the compile unit it describes). - DataExtractor rangesData(getRangeSection(), isLittleEndian(), - savedAddressByteSize); - offset = 0; - DWARFDebugRangeList rangeList; - while (rangeList.extract(rangesData, &offset)) - rangeList.dump(OS); + if (DumpType == DIDT_All || DumpType == DIDT_StrOffsetsDwo) { + OS << "\n.debug_str_offsets.dwo contents:\n"; + DataExtractor strOffsetExt(getStringOffsetDWOSection(), isLittleEndian(), 0); + offset = 0; + while (offset < getStringOffsetDWOSection().size()) { + OS << format("0x%8.8x: ", offset); + OS << format("%8.8x\n", strOffsetExt.getU32(&offset)); + } + } } const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { @@ -83,6 +150,16 @@ const DWARFDebugAbbrev *DWARFContext::getDebugAbbrev() { return Abbrev.get(); } +const DWARFDebugAbbrev *DWARFContext::getDebugAbbrevDWO() { + if (AbbrevDWO) + return AbbrevDWO.get(); + + DataExtractor abbrData(getAbbrevDWOSection(), isLittleEndian(), 0); + AbbrevDWO.reset(new DWARFDebugAbbrev()); + AbbrevDWO->parse(abbrData); + return AbbrevDWO.get(); +} + const DWARFDebugAranges *DWARFContext::getDebugAranges() { if (Aranges) return Aranges.get(); @@ -91,15 +168,37 @@ const DWARFDebugAranges *DWARFContext::getDebugAranges() { Aranges.reset(new DWARFDebugAranges()); Aranges->extract(arangesData); - if (Aranges->isEmpty()) // No aranges in file, generate them from the DIEs. - Aranges->generate(this); + // Generate aranges from DIEs: even if .debug_aranges section is present, + // it may describe only a small subset of compilation units, so we need to + // manually build aranges for the rest of them. + Aranges->generate(this); return Aranges.get(); } +const DWARFDebugFrame *DWARFContext::getDebugFrame() { + if (DebugFrame) + return DebugFrame.get(); + + // There's a "bug" in the DWARFv3 standard with respect to the target address + // size within debug frame sections. While DWARF is supposed to be independent + // of its container, FDEs have fields with size being "target address size", + // which isn't specified in DWARF in general. It's only specified for CUs, but + // .eh_frame can appear without a .debug_info section. Follow the example of + // other tools (libdwarf) and extract this from the container (ObjectFile + // provides this information). This problem is fixed in DWARFv4 + // See this dwarf-discuss discussion for more details: + // http://lists.dwarfstd.org/htdig.cgi/dwarf-discuss-dwarfstd.org/2011-December/001173.html + DataExtractor debugFrameData(getDebugFrameSection(), isLittleEndian(), + getAddressSize()); + DebugFrame.reset(new DWARFDebugFrame()); + DebugFrame->parse(debugFrameData); + return DebugFrame.get(); +} + const DWARFLineTable * DWARFContext::getLineTableForCompileUnit(DWARFCompileUnit *cu) { if (!Line) - Line.reset(new DWARFDebugLine()); + Line.reset(new DWARFDebugLine(&lineRelocMap())); unsigned stmtOffset = cu->getCompileUnitDIE()->getAttributeValueAsUnsigned(cu, DW_AT_stmt_list, @@ -122,7 +221,12 @@ void DWARFContext::parseCompileUnits() { const DataExtractor &DIData = DataExtractor(getInfoSection(), isLittleEndian(), 0); while (DIData.isValidOffset(offset)) { - CUs.push_back(DWARFCompileUnit(*this)); + CUs.push_back(DWARFCompileUnit(getDebugAbbrev(), getInfoSection(), + getAbbrevSection(), getRangeSection(), + getStringSection(), StringRef(), + getAddrSection(), + &infoRelocMap(), + isLittleEndian())); if (!CUs.back().extract(DIData, &offset)) { CUs.pop_back(); break; @@ -132,6 +236,28 @@ void DWARFContext::parseCompileUnits() { } } +void DWARFContext::parseDWOCompileUnits() { + uint32_t offset = 0; + const DataExtractor &DIData = DataExtractor(getInfoDWOSection(), + isLittleEndian(), 0); + while (DIData.isValidOffset(offset)) { + DWOCUs.push_back(DWARFCompileUnit(getDebugAbbrevDWO(), getInfoDWOSection(), + getAbbrevDWOSection(), + getRangeDWOSection(), + getStringDWOSection(), + getStringOffsetDWOSection(), + getAddrSection(), + &infoDWORelocMap(), + isLittleEndian())); + if (!DWOCUs.back().extract(DIData, &offset)) { + DWOCUs.pop_back(); + break; + } + + offset = DWOCUs.back().getNextCompileUnitOffset(); + } +} + namespace { struct OffsetComparator { bool operator()(const DWARFCompileUnit &LHS, @@ -242,6 +368,64 @@ DILineInfo DWARFContext::getLineInfoForAddress(uint64_t Address, Line, Column); } +DILineInfoTable DWARFContext::getLineInfoForAddressRange(uint64_t Address, + uint64_t Size, + DILineInfoSpecifier Specifier) { + DILineInfoTable Lines; + DWARFCompileUnit *CU = getCompileUnitForAddress(Address); + if (!CU) + return Lines; + + std::string FunctionName = ""; + if (Specifier.needs(DILineInfoSpecifier::FunctionName)) { + // The address may correspond to instruction in some inlined function, + // so we have to build the chain of inlined functions and take the + // name of the topmost function in it. + const DWARFDebugInfoEntryMinimal::InlinedChain &InlinedChain = + CU->getInlinedChainForAddress(Address); + if (InlinedChain.size() > 0) { + const DWARFDebugInfoEntryMinimal &TopFunctionDIE = InlinedChain[0]; + if (const char *Name = TopFunctionDIE.getSubroutineName(CU)) + FunctionName = Name; + } + } + + StringRef FuncNameRef = StringRef(FunctionName); + + // If the Specifier says we don't need FileLineInfo, just + // return the top-most function at the starting address. + if (!Specifier.needs(DILineInfoSpecifier::FileLineInfo)) { + Lines.push_back(std::make_pair(Address, + DILineInfo(StringRef(""), + FuncNameRef, 0, 0))); + return Lines; + } + + const DWARFLineTable *LineTable = getLineTableForCompileUnit(CU); + const bool NeedsAbsoluteFilePath = + Specifier.needs(DILineInfoSpecifier::AbsoluteFilePath); + + // Get the index of row we're looking for in the line table. + std::vector RowVector; + if (!LineTable->lookupAddressRange(Address, Size, RowVector)) + return Lines; + + uint32_t NumRows = RowVector.size(); + for (uint32_t i = 0; i < NumRows; ++i) { + uint32_t RowIndex = RowVector[i]; + // Take file number and line/column from the row. + const DWARFDebugLine::Row &Row = LineTable->Rows[RowIndex]; + std::string FileName = ""; + getFileNameForCompileUnit(CU, LineTable, Row.File, + NeedsAbsoluteFilePath, FileName); + Lines.push_back(std::make_pair(Row.Address, + DILineInfo(StringRef(FileName), + FuncNameRef, Row.Line, Row.Column))); + } + + return Lines; +} + DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier) { DWARFCompileUnit *CU = getCompileUnitForAddress(Address); @@ -298,4 +482,115 @@ DIInliningInfo DWARFContext::getInliningInfoForAddress(uint64_t Address, return InliningInfo; } +DWARFContextInMemory::DWARFContextInMemory(object::ObjectFile *Obj) : + IsLittleEndian(Obj->isLittleEndian()), + AddressSize(Obj->getBytesInAddress()) { + error_code ec; + for (object::section_iterator i = Obj->begin_sections(), + e = Obj->end_sections(); + i != e; i.increment(ec)) { + StringRef name; + i->getName(name); + StringRef data; + i->getContents(data); + + name = name.substr(name.find_first_not_of("._")); // Skip . and _ prefixes. + if (name == "debug_info") + InfoSection = data; + else if (name == "debug_abbrev") + AbbrevSection = data; + else if (name == "debug_line") + LineSection = data; + else if (name == "debug_aranges") + ARangeSection = data; + else if (name == "debug_frame") + DebugFrameSection = data; + else if (name == "debug_str") + StringSection = data; + else if (name == "debug_ranges") { + // FIXME: Use the other dwo range section when we emit it. + RangeDWOSection = data; + RangeSection = data; + } + else if (name == "debug_pubnames") + PubNamesSection = data; + else if (name == "debug_info.dwo") + InfoDWOSection = data; + else if (name == "debug_abbrev.dwo") + AbbrevDWOSection = data; + else if (name == "debug_str.dwo") + StringDWOSection = data; + else if (name == "debug_str_offsets.dwo") + StringOffsetDWOSection = data; + else if (name == "debug_addr") + AddrSection = data; + // Any more debug info sections go here. + else + continue; + + // TODO: Add support for relocations in other sections as needed. + // Record relocations for the debug_info and debug_line sections. + RelocAddrMap *Map; + if (name == "debug_info") + Map = &InfoRelocMap; + else if (name == "debug_info.dwo") + Map = &InfoDWORelocMap; + else if (name == "debug_line") + Map = &LineRelocMap; + else + continue; + + if (i->begin_relocations() != i->end_relocations()) { + uint64_t SectionSize; + i->getSize(SectionSize); + for (object::relocation_iterator reloc_i = i->begin_relocations(), + reloc_e = i->end_relocations(); + reloc_i != reloc_e; reloc_i.increment(ec)) { + uint64_t Address; + reloc_i->getAddress(Address); + uint64_t Type; + reloc_i->getType(Type); + uint64_t SymAddr = 0; + // ELF relocations may need the symbol address + if (Obj->isELF()) { + object::SymbolRef Sym; + reloc_i->getSymbol(Sym); + Sym.getAddress(SymAddr); + } + + object::RelocVisitor V(Obj->getFileFormatName()); + // The section address is always 0 for debug sections. + object::RelocToApply R(V.visit(Type, *reloc_i, 0, SymAddr)); + if (V.error()) { + SmallString<32> Name; + error_code ec(reloc_i->getTypeName(Name)); + if (ec) { + errs() << "Aaaaaa! Nameless relocation! Aaaaaa!\n"; + } + errs() << "error: failed to compute relocation: " + << Name << "\n"; + continue; + } + + if (Address + R.Width > SectionSize) { + errs() << "error: " << R.Width << "-byte relocation starting " + << Address << " bytes into section " << name << " which is " + << SectionSize << " bytes long.\n"; + continue; + } + if (R.Width > 8) { + errs() << "error: can't handle a relocation of more than 8 bytes at " + "a time.\n"; + continue; + } + DEBUG(dbgs() << "Writing " << format("%p", R.Value) + << " at " << format("%p", Address) + << " with width " << format("%d", R.Width) + << "\n"); + Map->insert(std::make_pair(Address, std::make_pair(R.Width, R.Value))); + } + } + } +} + void DWARFContextInMemory::anchor() { } diff --git a/lib/DebugInfo/DWARFContext.h b/lib/DebugInfo/DWARFContext.h index 4001792b3d5f..37b272993f37 100644 --- a/lib/DebugInfo/DWARFContext.h +++ b/lib/DebugInfo/DWARFContext.h @@ -12,11 +12,12 @@ #include "DWARFCompileUnit.h" #include "DWARFDebugAranges.h" +#include "DWARFDebugFrame.h" #include "DWARFDebugLine.h" #include "DWARFDebugRangeList.h" -#include "llvm/DebugInfo/DIContext.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/DebugInfo/DIContext.h" namespace llvm { @@ -25,24 +26,28 @@ namespace llvm { /// information parsing. The actual data is supplied through pure virtual /// methods that a concrete implementation provides. class DWARFContext : public DIContext { - bool IsLittleEndian; - const RelocAddrMap &RelocMap; - SmallVector CUs; OwningPtr Abbrev; OwningPtr Aranges; OwningPtr Line; + OwningPtr DebugFrame; + + SmallVector DWOCUs; + OwningPtr AbbrevDWO; DWARFContext(DWARFContext &) LLVM_DELETED_FUNCTION; DWARFContext &operator=(DWARFContext &) LLVM_DELETED_FUNCTION; /// Read compile units from the debug_info section and store them in CUs. void parseCompileUnits(); -protected: - DWARFContext(bool isLittleEndian, const RelocAddrMap &Map) : - IsLittleEndian(isLittleEndian), RelocMap(Map) {} + + /// Read compile units from the debug_info.dwo section and store them in + /// DWOCUs. + void parseDWOCompileUnits(); + public: - virtual void dump(raw_ostream &OS); + DWARFContext() {} + virtual void dump(raw_ostream &OS, DIDumpType DumpType = DIDT_All); /// Get the number of compile units in this context. unsigned getNumCompileUnits() { @@ -50,6 +55,14 @@ public: parseCompileUnits(); return CUs.size(); } + + /// Get the number of compile units in the DWO context. + unsigned getNumDWOCompileUnits() { + if (DWOCUs.empty()) + parseDWOCompileUnits(); + return DWOCUs.size(); + } + /// Get the compile unit at the specified index for this compile unit. DWARFCompileUnit *getCompileUnitAtIndex(unsigned index) { if (CUs.empty()) @@ -57,30 +70,57 @@ public: return &CUs[index]; } + /// Get the compile unit at the specified index for the DWO compile units. + DWARFCompileUnit *getDWOCompileUnitAtIndex(unsigned index) { + if (DWOCUs.empty()) + parseDWOCompileUnits(); + return &DWOCUs[index]; + } + /// Get a pointer to the parsed DebugAbbrev object. const DWARFDebugAbbrev *getDebugAbbrev(); + /// Get a pointer to the parsed dwo abbreviations object. + const DWARFDebugAbbrev *getDebugAbbrevDWO(); + /// Get a pointer to the parsed DebugAranges object. const DWARFDebugAranges *getDebugAranges(); + /// Get a pointer to the parsed frame information object. + const DWARFDebugFrame *getDebugFrame(); + /// Get a pointer to a parsed line table corresponding to a compile unit. const DWARFDebugLine::LineTable * getLineTableForCompileUnit(DWARFCompileUnit *cu); virtual DILineInfo getLineInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); + virtual DILineInfoTable getLineInfoForAddressRange(uint64_t Address, + uint64_t Size, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); virtual DIInliningInfo getInliningInfoForAddress(uint64_t Address, DILineInfoSpecifier Specifier = DILineInfoSpecifier()); - bool isLittleEndian() const { return IsLittleEndian; } - const RelocAddrMap &relocMap() const { return RelocMap; } - + virtual bool isLittleEndian() const = 0; + virtual uint8_t getAddressSize() const = 0; + virtual const RelocAddrMap &infoRelocMap() const = 0; + virtual const RelocAddrMap &lineRelocMap() const = 0; virtual StringRef getInfoSection() = 0; virtual StringRef getAbbrevSection() = 0; virtual StringRef getARangeSection() = 0; + virtual StringRef getDebugFrameSection() = 0; virtual StringRef getLineSection() = 0; virtual StringRef getStringSection() = 0; virtual StringRef getRangeSection() = 0; + virtual StringRef getPubNamesSection() = 0; + + // Sections for DWARF5 split dwarf proposal. + virtual StringRef getInfoDWOSection() = 0; + virtual StringRef getAbbrevDWOSection() = 0; + virtual StringRef getStringDWOSection() = 0; + virtual StringRef getStringOffsetDWOSection() = 0; + virtual StringRef getRangeDWOSection() = 0; + virtual StringRef getAddrSection() = 0; + virtual const RelocAddrMap &infoDWORelocMap() const = 0; static bool isSupportedVersion(unsigned version) { return version == 2 || version == 3; @@ -99,36 +139,57 @@ private: /// pointers to it. class DWARFContextInMemory : public DWARFContext { virtual void anchor(); + bool IsLittleEndian; + uint8_t AddressSize; + RelocAddrMap InfoRelocMap; + RelocAddrMap LineRelocMap; StringRef InfoSection; StringRef AbbrevSection; StringRef ARangeSection; + StringRef DebugFrameSection; StringRef LineSection; StringRef StringSection; StringRef RangeSection; -public: - DWARFContextInMemory(bool isLittleEndian, - StringRef infoSection, - StringRef abbrevSection, - StringRef aRangeSection, - StringRef lineSection, - StringRef stringSection, - StringRef rangeSection, - const RelocAddrMap &Map = RelocAddrMap()) - : DWARFContext(isLittleEndian, Map), - InfoSection(infoSection), - AbbrevSection(abbrevSection), - ARangeSection(aRangeSection), - LineSection(lineSection), - StringSection(stringSection), - RangeSection(rangeSection) - {} + StringRef PubNamesSection; + // Sections for DWARF5 split dwarf proposal. + RelocAddrMap InfoDWORelocMap; + StringRef InfoDWOSection; + StringRef AbbrevDWOSection; + StringRef StringDWOSection; + StringRef StringOffsetDWOSection; + StringRef RangeDWOSection; + StringRef AddrSection; + +public: + DWARFContextInMemory(object::ObjectFile *); + virtual bool isLittleEndian() const { return IsLittleEndian; } + virtual uint8_t getAddressSize() const { return AddressSize; } + virtual const RelocAddrMap &infoRelocMap() const { return InfoRelocMap; } + virtual const RelocAddrMap &lineRelocMap() const { return LineRelocMap; } virtual StringRef getInfoSection() { return InfoSection; } virtual StringRef getAbbrevSection() { return AbbrevSection; } virtual StringRef getARangeSection() { return ARangeSection; } + virtual StringRef getDebugFrameSection() { return DebugFrameSection; } virtual StringRef getLineSection() { return LineSection; } virtual StringRef getStringSection() { return StringSection; } virtual StringRef getRangeSection() { return RangeSection; } + virtual StringRef getPubNamesSection() { return PubNamesSection; } + + // Sections for DWARF5 split dwarf proposal. + virtual StringRef getInfoDWOSection() { return InfoDWOSection; } + virtual StringRef getAbbrevDWOSection() { return AbbrevDWOSection; } + virtual StringRef getStringDWOSection() { return StringDWOSection; } + virtual StringRef getStringOffsetDWOSection() { + return StringOffsetDWOSection; + } + virtual StringRef getRangeDWOSection() { return RangeDWOSection; } + virtual StringRef getAddrSection() { + return AddrSection; + } + virtual const RelocAddrMap &infoDWORelocMap() const { + return InfoDWORelocMap; + } }; } diff --git a/lib/DebugInfo/DWARFDebugArangeSet.cpp b/lib/DebugInfo/DWARFDebugArangeSet.cpp index 2efbfd1f92fb..7dff9ff49a62 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.cpp +++ b/lib/DebugInfo/DWARFDebugArangeSet.cpp @@ -16,7 +16,7 @@ using namespace llvm; void DWARFDebugArangeSet::clear() { Offset = -1U; - std::memset(&Header, 0, sizeof(Header)); + std::memset(&HeaderData, 0, sizeof(Header)); ArangeDescriptors.clear(); } @@ -66,15 +66,15 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // descriptor on the target system. This header is followed by a series // of tuples. Each tuple consists of an address and a length, each in // the size appropriate for an address on the target architecture. - Header.Length = data.getU32(offset_ptr); - Header.Version = data.getU16(offset_ptr); - Header.CuOffset = data.getU32(offset_ptr); - Header.AddrSize = data.getU8(offset_ptr); - Header.SegSize = data.getU8(offset_ptr); + HeaderData.Length = data.getU32(offset_ptr); + HeaderData.Version = data.getU16(offset_ptr); + HeaderData.CuOffset = data.getU32(offset_ptr); + HeaderData.AddrSize = data.getU8(offset_ptr); + HeaderData.SegSize = data.getU8(offset_ptr); // Perform basic validation of the header fields. - if (!data.isValidOffsetForDataOfSize(Offset, Header.Length) || - (Header.AddrSize != 4 && Header.AddrSize != 8)) { + if (!data.isValidOffsetForDataOfSize(Offset, HeaderData.Length) || + (HeaderData.AddrSize != 4 && HeaderData.AddrSize != 8)) { clear(); return false; } @@ -84,7 +84,7 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { // size of an address). The header is padded, if necessary, to the // appropriate boundary. const uint32_t header_size = *offset_ptr - Offset; - const uint32_t tuple_size = Header.AddrSize * 2; + const uint32_t tuple_size = HeaderData.AddrSize * 2; uint32_t first_tuple_offset = 0; while (first_tuple_offset < header_size) first_tuple_offset += tuple_size; @@ -94,11 +94,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { Descriptor arangeDescriptor; assert(sizeof(arangeDescriptor.Address) == sizeof(arangeDescriptor.Length)); - assert(sizeof(arangeDescriptor.Address) >= Header.AddrSize); + assert(sizeof(arangeDescriptor.Address) >= HeaderData.AddrSize); while (data.isValidOffset(*offset_ptr)) { - arangeDescriptor.Address = data.getUnsigned(offset_ptr, Header.AddrSize); - arangeDescriptor.Length = data.getUnsigned(offset_ptr, Header.AddrSize); + arangeDescriptor.Address = data.getUnsigned(offset_ptr, HeaderData.AddrSize); + arangeDescriptor.Length = data.getUnsigned(offset_ptr, HeaderData.AddrSize); // Each set of tuples is terminated by a 0 for the address and 0 // for the length. @@ -115,11 +115,11 @@ DWARFDebugArangeSet::extract(DataExtractor data, uint32_t *offset_ptr) { void DWARFDebugArangeSet::dump(raw_ostream &OS) const { OS << format("Address Range Header: length = 0x%8.8x, version = 0x%4.4x, ", - Header.Length, Header.Version) + HeaderData.Length, HeaderData.Version) << format("cu_offset = 0x%8.8x, addr_size = 0x%2.2x, seg_size = 0x%2.2x\n", - Header.CuOffset, Header.AddrSize, Header.SegSize); + HeaderData.CuOffset, HeaderData.AddrSize, HeaderData.SegSize); - const uint32_t hex_width = Header.AddrSize * 2; + const uint32_t hex_width = HeaderData.AddrSize * 2; for (DescriptorConstIter pos = ArangeDescriptors.begin(), end = ArangeDescriptors.end(); pos != end; ++pos) OS << format("[0x%*.*" PRIx64 " -", hex_width, hex_width, pos->Address) @@ -145,7 +145,7 @@ uint32_t DWARFDebugArangeSet::findAddress(uint64_t address) const { std::find_if(ArangeDescriptors.begin(), end, // Range DescriptorContainsAddress(address)); // Predicate if (pos != end) - return Header.CuOffset; + return HeaderData.CuOffset; return -1U; } diff --git a/lib/DebugInfo/DWARFDebugArangeSet.h b/lib/DebugInfo/DWARFDebugArangeSet.h index 9a2a6d0f0037..d76867615aa1 100644 --- a/lib/DebugInfo/DWARFDebugArangeSet.h +++ b/lib/DebugInfo/DWARFDebugArangeSet.h @@ -48,7 +48,7 @@ private: typedef DescriptorColl::const_iterator DescriptorConstIter; uint32_t Offset; - Header Header; + Header HeaderData; DescriptorColl ArangeDescriptors; public: @@ -58,11 +58,11 @@ public: bool extract(DataExtractor data, uint32_t *offset_ptr); void dump(raw_ostream &OS) const; - uint32_t getCompileUnitDIEOffset() const { return Header.CuOffset; } - uint32_t getOffsetOfNextEntry() const { return Offset + Header.Length + 4; } + uint32_t getCompileUnitDIEOffset() const { return HeaderData.CuOffset; } + uint32_t getOffsetOfNextEntry() const { return Offset + HeaderData.Length + 4; } uint32_t findAddress(uint64_t address) const; uint32_t getNumDescriptors() const { return ArangeDescriptors.size(); } - const struct Header &getHeader() const { return Header; } + const struct Header &getHeader() const { return HeaderData; } const Descriptor *getDescriptor(uint32_t i) const { if (i < ArangeDescriptors.size()) return &ArangeDescriptors[i]; diff --git a/lib/DebugInfo/DWARFDebugAranges.cpp b/lib/DebugInfo/DWARFDebugAranges.cpp index f9a34c908f1d..f79862d606f5 100644 --- a/lib/DebugInfo/DWARFDebugAranges.cpp +++ b/lib/DebugInfo/DWARFDebugAranges.cpp @@ -26,34 +26,40 @@ namespace { class CountArangeDescriptors { public: CountArangeDescriptors(uint32_t &count_ref) : Count(count_ref) {} - void operator()(const DWARFDebugArangeSet &set) { - Count += set.getNumDescriptors(); + void operator()(const DWARFDebugArangeSet &Set) { + Count += Set.getNumDescriptors(); } uint32_t &Count; }; class AddArangeDescriptors { public: - AddArangeDescriptors(DWARFDebugAranges::RangeColl &ranges) - : RangeCollection(ranges) {} - void operator()(const DWARFDebugArangeSet& set) { - const DWARFDebugArangeSet::Descriptor* arange_desc_ptr; - DWARFDebugAranges::Range range; - range.Offset = set.getCompileUnitDIEOffset(); - - for (uint32_t i=0; (arange_desc_ptr = set.getDescriptor(i)) != NULL; ++i){ - range.LoPC = arange_desc_ptr->Address; - range.Length = arange_desc_ptr->Length; + AddArangeDescriptors(DWARFDebugAranges::RangeColl &Ranges, + DWARFDebugAranges::ParsedCUOffsetColl &CUOffsets) + : RangeCollection(Ranges), + CUOffsetCollection(CUOffsets) {} + void operator()(const DWARFDebugArangeSet &Set) { + DWARFDebugAranges::Range Range; + Range.Offset = Set.getCompileUnitDIEOffset(); + CUOffsetCollection.insert(Range.Offset); + + for (uint32_t i = 0, n = Set.getNumDescriptors(); i < n; ++i) { + const DWARFDebugArangeSet::Descriptor *ArangeDescPtr = + Set.getDescriptor(i); + Range.LoPC = ArangeDescPtr->Address; + Range.Length = ArangeDescPtr->Length; // Insert each item in increasing address order so binary searching // can later be done! - DWARFDebugAranges::RangeColl::iterator insert_pos = + DWARFDebugAranges::RangeColl::iterator InsertPos = std::lower_bound(RangeCollection.begin(), RangeCollection.end(), - range, RangeLessThan); - RangeCollection.insert(insert_pos, range); + Range, RangeLessThan); + RangeCollection.insert(InsertPos, Range); } + } - DWARFDebugAranges::RangeColl& RangeCollection; + DWARFDebugAranges::RangeColl &RangeCollection; + DWARFDebugAranges::ParsedCUOffsetColl &CUOffsetCollection; }; } @@ -75,7 +81,7 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { if (count > 0) { Aranges.reserve(count); - AddArangeDescriptors range_adder(Aranges); + AddArangeDescriptors range_adder(Aranges, ParsedCUOffsets); std::for_each(sets.begin(), sets.end(), range_adder); } } @@ -83,13 +89,14 @@ bool DWARFDebugAranges::extract(DataExtractor debug_aranges_data) { } bool DWARFDebugAranges::generate(DWARFContext *ctx) { - clear(); if (ctx) { const uint32_t num_compile_units = ctx->getNumCompileUnits(); for (uint32_t cu_idx = 0; cu_idx < num_compile_units; ++cu_idx) { - DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx); - if (cu) - cu->buildAddressRangeTable(this, true); + if (DWARFCompileUnit *cu = ctx->getCompileUnitAtIndex(cu_idx)) { + uint32_t CUOffset = cu->getOffset(); + if (ParsedCUOffsets.insert(CUOffset).second) + cu->buildAddressRangeTable(this, true); + } } } sort(true, /* overlap size */ 0); @@ -179,7 +186,7 @@ uint32_t DWARFDebugAranges::findAddress(uint64_t address) const { Range range(address); RangeCollIterator begin = Aranges.begin(); RangeCollIterator end = Aranges.end(); - RangeCollIterator pos = lower_bound(begin, end, range, RangeLessThan); + RangeCollIterator pos = std::lower_bound(begin, end, range, RangeLessThan); if (pos != end && pos->LoPC <= address && address < pos->HiPC()) { return pos->Offset; diff --git a/lib/DebugInfo/DWARFDebugAranges.h b/lib/DebugInfo/DWARFDebugAranges.h index 12afb60beb40..1509ffad41f1 100644 --- a/lib/DebugInfo/DWARFDebugAranges.h +++ b/lib/DebugInfo/DWARFDebugAranges.h @@ -11,6 +11,7 @@ #define LLVM_DEBUGINFO_DWARFDEBUGARANGES_H #include "DWARFDebugArangeSet.h" +#include "llvm/ADT/DenseSet.h" #include namespace llvm { @@ -60,7 +61,10 @@ public: uint32_t Offset; // Offset of the compile unit or die }; - void clear() { Aranges.clear(); } + void clear() { + Aranges.clear(); + ParsedCUOffsets.clear(); + } bool allRangesAreContiguous(uint64_t& LoPC, uint64_t& HiPC) const; bool getMaxRange(uint64_t& LoPC, uint64_t& HiPC) const; bool extract(DataExtractor debug_aranges_data); @@ -88,9 +92,11 @@ public: typedef std::vector RangeColl; typedef RangeColl::const_iterator RangeCollIterator; + typedef DenseSet ParsedCUOffsetColl; private: RangeColl Aranges; + ParsedCUOffsetColl ParsedCUOffsets; }; } diff --git a/lib/DebugInfo/DWARFDebugFrame.cpp b/lib/DebugInfo/DWARFDebugFrame.cpp new file mode 100644 index 000000000000..3efe6a1ebd30 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugFrame.cpp @@ -0,0 +1,391 @@ +//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "DWARFDebugFrame.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/Support/DataTypes.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/Format.h" +#include "llvm/Support/raw_ostream.h" +#include +#include + +using namespace llvm; +using namespace dwarf; + + +/// \brief Abstract frame entry defining the common interface concrete +/// entries implement. +class llvm::FrameEntry { +public: + enum FrameKind {FK_CIE, FK_FDE}; + FrameEntry(FrameKind K, DataExtractor D, uint64_t Offset, uint64_t Length) + : Kind(K), Data(D), Offset(Offset), Length(Length) {} + + virtual ~FrameEntry() { + } + + FrameKind getKind() const { return Kind; } + virtual uint64_t getOffset() const { return Offset; } + + /// \brief Parse and store a sequence of CFI instructions from our data + /// stream, starting at *Offset and ending at EndOffset. If everything + /// goes well, *Offset should be equal to EndOffset when this method + /// returns. Otherwise, an error occurred. + virtual void parseInstructions(uint32_t *Offset, uint32_t EndOffset); + + /// \brief Dump the entry header to the given output stream. + virtual void dumpHeader(raw_ostream &OS) const = 0; + + /// \brief Dump the entry's instructions to the given output stream. + virtual void dumpInstructions(raw_ostream &OS) const; + +protected: + const FrameKind Kind; + + /// \brief The data stream holding the section from which the entry was + /// parsed. + DataExtractor Data; + + /// \brief Offset of this entry in the section. + uint64_t Offset; + + /// \brief Entry length as specified in DWARF. + uint64_t Length; + + /// An entry may contain CFI instructions. An instruction consists of an + /// opcode and an optional sequence of operands. + typedef std::vector Operands; + struct Instruction { + Instruction(uint8_t Opcode) + : Opcode(Opcode) + {} + + uint8_t Opcode; + Operands Ops; + }; + + std::vector Instructions; + + /// Convenience methods to add a new instruction with the given opcode and + /// operands to the Instructions vector. + void addInstruction(uint8_t Opcode) { + Instructions.push_back(Instruction(Opcode)); + } + + void addInstruction(uint8_t Opcode, uint64_t Operand1) { + Instructions.push_back(Instruction(Opcode)); + Instructions.back().Ops.push_back(Operand1); + } + + void addInstruction(uint8_t Opcode, uint64_t Operand1, uint64_t Operand2) { + Instructions.push_back(Instruction(Opcode)); + Instructions.back().Ops.push_back(Operand1); + Instructions.back().Ops.push_back(Operand2); + } +}; + + +// See DWARF standard v3, section 7.23 +const uint8_t DWARF_CFI_PRIMARY_OPCODE_MASK = 0xc0; +const uint8_t DWARF_CFI_PRIMARY_OPERAND_MASK = 0x3f; + + +void FrameEntry::parseInstructions(uint32_t *Offset, uint32_t EndOffset) { + while (*Offset < EndOffset) { + uint8_t Opcode = Data.getU8(Offset); + // Some instructions have a primary opcode encoded in the top bits. + uint8_t Primary = Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK; + + if (Primary) { + // If it's a primary opcode, the first operand is encoded in the bottom + // bits of the opcode itself. + uint64_t Op1 = Opcode & DWARF_CFI_PRIMARY_OPERAND_MASK; + switch (Primary) { + default: llvm_unreachable("Impossible primary CFI opcode"); + case DW_CFA_advance_loc: + case DW_CFA_restore: + addInstruction(Primary, Op1); + break; + case DW_CFA_offset: + addInstruction(Primary, Op1, Data.getULEB128(Offset)); + break; + } + } else { + // Extended opcode - its value is Opcode itself. + switch (Opcode) { + default: llvm_unreachable("Invalid extended CFI opcode"); + case DW_CFA_nop: + case DW_CFA_remember_state: + case DW_CFA_restore_state: + // No operands + addInstruction(Opcode); + break; + case DW_CFA_set_loc: + // Operands: Address + addInstruction(Opcode, Data.getAddress(Offset)); + break; + case DW_CFA_advance_loc1: + // Operands: 1-byte delta + addInstruction(Opcode, Data.getU8(Offset)); + break; + case DW_CFA_advance_loc2: + // Operands: 2-byte delta + addInstruction(Opcode, Data.getU16(Offset)); + break; + case DW_CFA_advance_loc4: + // Operands: 4-byte delta + addInstruction(Opcode, Data.getU32(Offset)); + break; + case DW_CFA_restore_extended: + case DW_CFA_undefined: + case DW_CFA_same_value: + case DW_CFA_def_cfa_register: + case DW_CFA_def_cfa_offset: + // Operands: ULEB128 + addInstruction(Opcode, Data.getULEB128(Offset)); + break; + case DW_CFA_def_cfa_offset_sf: + // Operands: SLEB128 + addInstruction(Opcode, Data.getSLEB128(Offset)); + break; + case DW_CFA_offset_extended: + case DW_CFA_register: + case DW_CFA_def_cfa: + case DW_CFA_val_offset: + // Operands: ULEB128, ULEB128 + addInstruction(Opcode, Data.getULEB128(Offset), + Data.getULEB128(Offset)); + break; + case DW_CFA_offset_extended_sf: + case DW_CFA_def_cfa_sf: + case DW_CFA_val_offset_sf: + // Operands: ULEB128, SLEB128 + addInstruction(Opcode, Data.getULEB128(Offset), + Data.getSLEB128(Offset)); + break; + case DW_CFA_def_cfa_expression: + case DW_CFA_expression: + case DW_CFA_val_expression: + // TODO: implement this + report_fatal_error("Values with expressions not implemented yet!"); + } + } + } +} + + +void FrameEntry::dumpInstructions(raw_ostream &OS) const { + // TODO: at the moment only instruction names are dumped. Expand this to + // dump operands as well. + for (std::vector::const_iterator I = Instructions.begin(), + E = Instructions.end(); + I != E; ++I) { + uint8_t Opcode = I->Opcode; + if (Opcode & DWARF_CFI_PRIMARY_OPCODE_MASK) + Opcode &= DWARF_CFI_PRIMARY_OPCODE_MASK; + OS << " " << CallFrameString(Opcode) << ":\n"; + } +} + + +namespace { +/// \brief DWARF Common Information Entry (CIE) +class CIE : public FrameEntry { +public: + // CIEs (and FDEs) are simply container classes, so the only sensible way to + // create them is by providing the full parsed contents in the constructor. + CIE(DataExtractor D, uint64_t Offset, uint64_t Length, uint8_t Version, + SmallString<8> Augmentation, uint64_t CodeAlignmentFactor, + int64_t DataAlignmentFactor, uint64_t ReturnAddressRegister) + : FrameEntry(FK_CIE, D, Offset, Length), Version(Version), + Augmentation(Augmentation), CodeAlignmentFactor(CodeAlignmentFactor), + DataAlignmentFactor(DataAlignmentFactor), + ReturnAddressRegister(ReturnAddressRegister) {} + + ~CIE() { + } + + void dumpHeader(raw_ostream &OS) const { + OS << format("%08x %08x %08x CIE", + (uint32_t)Offset, (uint32_t)Length, DW_CIE_ID) + << "\n"; + OS << format(" Version: %d\n", Version); + OS << " Augmentation: \"" << Augmentation << "\"\n"; + OS << format(" Code alignment factor: %u\n", + (uint32_t)CodeAlignmentFactor); + OS << format(" Data alignment factor: %d\n", + (int32_t)DataAlignmentFactor); + OS << format(" Return address column: %d\n", + (int32_t)ReturnAddressRegister); + OS << "\n"; + } + + static bool classof(const FrameEntry *FE) { + return FE->getKind() == FK_CIE; + } + +private: + /// The following fields are defined in section 6.4.1 of the DWARF standard v3 + uint8_t Version; + SmallString<8> Augmentation; + uint64_t CodeAlignmentFactor; + int64_t DataAlignmentFactor; + uint64_t ReturnAddressRegister; +}; + + +/// \brief DWARF Frame Description Entry (FDE) +class FDE : public FrameEntry { +public: + // Each FDE has a CIE it's "linked to". Our FDE contains is constructed with + // an offset to the CIE (provided by parsing the FDE header). The CIE itself + // is obtained lazily once it's actually required. + FDE(DataExtractor D, uint64_t Offset, uint64_t Length, + int64_t LinkedCIEOffset, uint64_t InitialLocation, uint64_t AddressRange) + : FrameEntry(FK_FDE, D, Offset, Length), LinkedCIEOffset(LinkedCIEOffset), + InitialLocation(InitialLocation), AddressRange(AddressRange), + LinkedCIE(NULL) {} + + ~FDE() { + } + + void dumpHeader(raw_ostream &OS) const { + OS << format("%08x %08x %08x FDE ", + (uint32_t)Offset, (uint32_t)Length, (int32_t)LinkedCIEOffset); + OS << format("cie=%08x pc=%08x...%08x\n", + (int32_t)LinkedCIEOffset, + (uint32_t)InitialLocation, + (uint32_t)InitialLocation + (uint32_t)AddressRange); + if (LinkedCIE) { + OS << format("%p\n", LinkedCIE); + } + } + + static bool classof(const FrameEntry *FE) { + return FE->getKind() == FK_FDE; + } +private: + + /// The following fields are defined in section 6.4.1 of the DWARF standard v3 + uint64_t LinkedCIEOffset; + uint64_t InitialLocation; + uint64_t AddressRange; + CIE *LinkedCIE; +}; +} // end anonymous namespace + + +DWARFDebugFrame::DWARFDebugFrame() { +} + + +DWARFDebugFrame::~DWARFDebugFrame() { + for (EntryVector::iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + delete *I; + } +} + + +static void LLVM_ATTRIBUTE_UNUSED dumpDataAux(DataExtractor Data, + uint32_t Offset, int Length) { + errs() << "DUMP: "; + for (int i = 0; i < Length; ++i) { + uint8_t c = Data.getU8(&Offset); + errs().write_hex(c); errs() << " "; + } + errs() << "\n"; +} + + +void DWARFDebugFrame::parse(DataExtractor Data) { + uint32_t Offset = 0; + + while (Data.isValidOffset(Offset)) { + uint32_t StartOffset = Offset; + + bool IsDWARF64 = false; + uint64_t Length = Data.getU32(&Offset); + uint64_t Id; + + if (Length == UINT32_MAX) { + // DWARF-64 is distinguished by the first 32 bits of the initial length + // field being 0xffffffff. Then, the next 64 bits are the actual entry + // length. + IsDWARF64 = true; + Length = Data.getU64(&Offset); + } + + // At this point, Offset points to the next field after Length. + // Length is the structure size excluding itself. Compute an offset one + // past the end of the structure (needed to know how many instructions to + // read). + // TODO: For honest DWARF64 support, DataExtractor will have to treat + // offset_ptr as uint64_t* + uint32_t EndStructureOffset = Offset + static_cast(Length); + + // The Id field's size depends on the DWARF format + Id = Data.getUnsigned(&Offset, IsDWARF64 ? 8 : 4); + bool IsCIE = ((IsDWARF64 && Id == DW64_CIE_ID) || Id == DW_CIE_ID); + + FrameEntry *Entry = 0; + if (IsCIE) { + // Note: this is specifically DWARFv3 CIE header structure. It was + // changed in DWARFv4. We currently don't support reading DWARFv4 + // here because LLVM itself does not emit it (and LLDB doesn't + // support it either). + uint8_t Version = Data.getU8(&Offset); + const char *Augmentation = Data.getCStr(&Offset); + uint64_t CodeAlignmentFactor = Data.getULEB128(&Offset); + int64_t DataAlignmentFactor = Data.getSLEB128(&Offset); + uint64_t ReturnAddressRegister = Data.getULEB128(&Offset); + + Entry = new CIE(Data, StartOffset, Length, Version, + StringRef(Augmentation), CodeAlignmentFactor, + DataAlignmentFactor, ReturnAddressRegister); + } else { + // FDE + uint64_t CIEPointer = Id; + uint64_t InitialLocation = Data.getAddress(&Offset); + uint64_t AddressRange = Data.getAddress(&Offset); + + Entry = new FDE(Data, StartOffset, Length, CIEPointer, + InitialLocation, AddressRange); + } + + assert(Entry && "Expected Entry to be populated with CIE or FDE"); + Entry->parseInstructions(&Offset, EndStructureOffset); + + if (Offset == EndStructureOffset) { + // Entry instrucitons parsed successfully. + Entries.push_back(Entry); + } else { + std::string Str; + raw_string_ostream OS(Str); + OS << format("Parsing entry instructions at %lx failed", + Entry->getOffset()); + report_fatal_error(Str); + } + } +} + + +void DWARFDebugFrame::dump(raw_ostream &OS) const { + OS << "\n"; + for (EntryVector::const_iterator I = Entries.begin(), E = Entries.end(); + I != E; ++I) { + FrameEntry *Entry = *I; + Entry->dumpHeader(OS); + Entry->dumpInstructions(OS); + OS << "\n"; + } +} + diff --git a/lib/DebugInfo/DWARFDebugFrame.h b/lib/DebugInfo/DWARFDebugFrame.h new file mode 100644 index 000000000000..48b8d63a5a64 --- /dev/null +++ b/lib/DebugInfo/DWARFDebugFrame.h @@ -0,0 +1,46 @@ +//===-- DWARFDebugFrame.h - Parsing of .debug_frame -------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFDEBUGFRAME_H +#define LLVM_DEBUGINFO_DWARFDEBUGFRAME_H + +#include "llvm/Support/DataExtractor.h" +#include "llvm/Support/raw_ostream.h" +#include + + +namespace llvm { + +class FrameEntry; + + +/// \brief A parsed .debug_frame section +/// +class DWARFDebugFrame { +public: + DWARFDebugFrame(); + ~DWARFDebugFrame(); + + /// \brief Dump the section data into the given stream. + void dump(raw_ostream &OS) const; + + /// \brief Parse the section from raw data. + /// data is assumed to be pointing to the beginning of the section. + void parse(DataExtractor Data); + +private: + typedef std::vector EntryVector; + EntryVector Entries; +}; + + +} // namespace llvm + +#endif + diff --git a/lib/DebugInfo/DWARFDebugInfoEntry.cpp b/lib/DebugInfo/DWARFDebugInfoEntry.cpp index ab6746445388..02b15d69043f 100644 --- a/lib/DebugInfo/DWARFDebugInfoEntry.cpp +++ b/lib/DebugInfo/DWARFDebugInfoEntry.cpp @@ -12,6 +12,7 @@ #include "DWARFContext.h" #include "DWARFDebugAbbrev.h" #include "DWARFFormValue.h" +#include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/Format.h" #include "llvm/Support/raw_ostream.h" @@ -39,7 +40,7 @@ void DWARFDebugInfoEntryMinimal::dump(raw_ostream &OS, OS << format(" [%u] %c\n", abbrCode, AbbrevDecl->hasChildren() ? '*' : ' '); - // Dump all data in the .debug_info for the attributes + // Dump all data in the DIE for the attributes. const uint32_t numAttributes = AbbrevDecl->getNumAttributes(); for (uint32_t i = 0; i != numAttributes; ++i) { uint16_t attr = AbbrevDecl->getAttrByIndex(i); @@ -113,9 +114,14 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, uint32_t i; uint16_t form; for (i=0; igetFormByIndex(i); - const uint8_t fixed_skip_size = fixed_form_sizes[form]; + // FIXME: Currently we're checking if this is less than the last + // entry in the fixed_form_sizes table, but this should be changed + // to use dynamic dispatch. + const uint8_t fixed_skip_size = (form < DW_FORM_ref_sig8) ? + fixed_form_sizes[form] : 0; if (fixed_skip_size) offset += fixed_skip_size; else { @@ -187,6 +193,8 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, case DW_FORM_sdata: case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: debug_info_data.getULEB128(&offset); break; @@ -195,11 +203,9 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, form = debug_info_data.getULEB128(&offset); break; + // FIXME: 64-bit for DWARF64 case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - debug_info_data.getU32(offset_ptr); - else - debug_info_data.getU64(offset_ptr); + debug_info_data.getU32(offset_ptr); break; default: @@ -207,7 +213,6 @@ bool DWARFDebugInfoEntryMinimal::extractFast(const DWARFCompileUnit *cu, return false; } offset += form_size; - } while (form_is_indirect); } } @@ -327,6 +332,8 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, case DW_FORM_sdata: case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: debug_info_data.getULEB128(&offset); break; @@ -335,11 +342,9 @@ DWARFDebugInfoEntryMinimal::extract(const DWARFCompileUnit *cu, form_is_indirect = true; break; + // FIXME: 64-bit for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - debug_info_data.getU32(offset_ptr); - else - debug_info_data.getU64(offset_ptr); + debug_info_data.getU32(offset_ptr); break; default: @@ -417,8 +422,7 @@ DWARFDebugInfoEntryMinimal::getAttributeValueAsString( const { DWARFFormValue form_value; if (getAttributeValue(cu, attr, form_value)) { - DataExtractor stringExtractor(cu->getContext().getStringSection(), - false, 0); + DataExtractor stringExtractor(cu->getStringSection(), false, 0); return form_value.getAsCString(&stringExtractor); } return fail_value; diff --git a/lib/DebugInfo/DWARFDebugLine.cpp b/lib/DebugInfo/DWARFDebugLine.cpp index 267364adfaca..192381c6f7c6 100644 --- a/lib/DebugInfo/DWARFDebugLine.cpp +++ b/lib/DebugInfo/DWARFDebugLine.cpp @@ -155,7 +155,7 @@ DWARFDebugLine::getOrParseLineTable(DataExtractor debug_line_data, if (pos.second) { // Parse and cache the line table for at this offset. State state; - if (!parseStatementTable(debug_line_data, &offset, state)) + if (!parseStatementTable(debug_line_data, RelocMap, &offset, state)) return 0; pos.first->second = state; } @@ -219,7 +219,8 @@ DWARFDebugLine::parsePrologue(DataExtractor debug_line_data, } bool -DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, +DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, + const RelocAddrMap *RMap, uint32_t *offset_ptr, State &state) { const uint32_t debug_line_offset = *offset_ptr; @@ -268,7 +269,15 @@ DWARFDebugLine::parseStatementTable(DataExtractor debug_line_data, // relocatable address. All of the other statement program opcodes // that affect the address register add a delta to it. This instruction // stores a relocatable value into it instead. - state.Address = debug_line_data.getAddress(offset_ptr); + { + // If this address is in our relocation map, apply the relocation. + RelocAddrMap::const_iterator AI = RMap->find(*offset_ptr); + if (AI != RMap->end()) { + const std::pair &R = AI->second; + state.Address = debug_line_data.getAddress(offset_ptr) + R.second; + } else + state.Address = debug_line_data.getAddress(offset_ptr); + } break; case DW_LNE_define_file: @@ -515,6 +524,83 @@ DWARFDebugLine::LineTable::lookupAddress(uint64_t address) const { return index; } +bool +DWARFDebugLine::LineTable::lookupAddressRange(uint64_t address, + uint64_t size, + std::vector& result) const { + if (Sequences.empty()) + return false; + uint64_t end_addr = address + size; + // First, find an instruction sequence containing the given address. + DWARFDebugLine::Sequence sequence; + sequence.LowPC = address; + SequenceIter first_seq = Sequences.begin(); + SequenceIter last_seq = Sequences.end(); + SequenceIter seq_pos = std::lower_bound(first_seq, last_seq, sequence, + DWARFDebugLine::Sequence::orderByLowPC); + if (seq_pos == last_seq || seq_pos->LowPC != address) { + if (seq_pos == first_seq) + return false; + seq_pos--; + } + if (!seq_pos->containsPC(address)) + return false; + + SequenceIter start_pos = seq_pos; + + // Add the rows from the first sequence to the vector, starting with the + // index we just calculated + + while (seq_pos != last_seq && seq_pos->LowPC < end_addr) { + DWARFDebugLine::Sequence cur_seq = *seq_pos; + uint32_t first_row_index; + uint32_t last_row_index; + if (seq_pos == start_pos) { + // For the first sequence, we need to find which row in the sequence is the + // first in our range. Rows are stored in a vector, so we may use + // arithmetical operations with iterators. + DWARFDebugLine::Row row; + row.Address = address; + RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; + RowIter row_pos = std::upper_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + // The 'row_pos' iterator references the first row that is greater than + // our start address. Unless that's the first row, we want to start at + // the row before that. + first_row_index = cur_seq.FirstRowIndex + (row_pos - first_row); + if (row_pos != first_row) + --first_row_index; + } else + first_row_index = cur_seq.FirstRowIndex; + + // For the last sequence in our range, we need to figure out the last row in + // range. For all other sequences we can go to the end of the sequence. + if (cur_seq.HighPC > end_addr) { + DWARFDebugLine::Row row; + row.Address = end_addr; + RowIter first_row = Rows.begin() + cur_seq.FirstRowIndex; + RowIter last_row = Rows.begin() + cur_seq.LastRowIndex; + RowIter row_pos = std::upper_bound(first_row, last_row, row, + DWARFDebugLine::Row::orderByAddress); + // The 'row_pos' iterator references the first row that is greater than + // our end address. The row before that is the last row we want. + last_row_index = cur_seq.FirstRowIndex + (row_pos - first_row) - 1; + } else + // Contrary to what you might expect, DWARFDebugLine::SequenceLastRowIndex + // isn't a valid index within the current sequence. It's that plus one. + last_row_index = cur_seq.LastRowIndex - 1; + + for (uint32_t i = first_row_index; i <= last_row_index; ++i) { + result.push_back(i); + } + + ++seq_pos; + } + + return true; +} + bool DWARFDebugLine::LineTable::getFileNameByIndex(uint64_t FileIndex, bool NeedsAbsoluteFilePath, diff --git a/lib/DebugInfo/DWARFDebugLine.h b/lib/DebugInfo/DWARFDebugLine.h index 586dd7e8784f..2990756bd7c9 100644 --- a/lib/DebugInfo/DWARFDebugLine.h +++ b/lib/DebugInfo/DWARFDebugLine.h @@ -10,6 +10,7 @@ #ifndef LLVM_DEBUGINFO_DWARFDEBUGLINE_H #define LLVM_DEBUGINFO_DWARFDEBUGLINE_H +#include "DWARFRelocMap.h" #include "llvm/Support/DataExtractor.h" #include #include @@ -21,6 +22,7 @@ class raw_ostream; class DWARFDebugLine { public: + DWARFDebugLine(const RelocAddrMap* LineInfoRelocMap) : RelocMap(LineInfoRelocMap) {} struct FileNameEntry { FileNameEntry() : Name(0), DirIdx(0), ModTime(0), Length(0) {} @@ -176,6 +178,10 @@ public: // or -1 if there is no such row. uint32_t lookupAddress(uint64_t address) const; + bool lookupAddressRange(uint64_t address, + uint64_t size, + std::vector& result) const; + // Extracts filename by its index in filename table in prologue. // Returns true on success. bool getFileNameByIndex(uint64_t FileIndex, @@ -227,6 +233,7 @@ public: Prologue *prologue); /// Parse a single line table (prologue and all rows). static bool parseStatementTable(DataExtractor debug_line_data, + const RelocAddrMap *RMap, uint32_t *offset_ptr, State &state); const LineTable *getLineTable(uint32_t offset) const; @@ -238,6 +245,7 @@ private: typedef LineTableMapTy::iterator LineTableIter; typedef LineTableMapTy::const_iterator LineTableConstIter; + const RelocAddrMap *RelocMap; LineTableMapTy LineTableMap; }; diff --git a/lib/DebugInfo/DWARFFormValue.cpp b/lib/DebugInfo/DWARFFormValue.cpp index fea9fd7f7d34..9f807aac5fd4 100644 --- a/lib/DebugInfo/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARFFormValue.cpp @@ -72,7 +72,7 @@ static const uint8_t form_sizes_addr8[] = { 8, // 0x14 DW_FORM_ref8 0, // 0x15 DW_FORM_ref_udata 0, // 0x16 DW_FORM_indirect - 8, // 0x17 DW_FORM_sec_offset + 4, // 0x17 DW_FORM_sec_offset 0, // 0x18 DW_FORM_exprloc 0, // 0x19 DW_FORM_flag_present 8, // 0x20 DW_FORM_ref_sig8 @@ -101,15 +101,15 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, case DW_FORM_addr: case DW_FORM_ref_addr: { RelocAddrMap::const_iterator AI - = cu->getContext().relocMap().find(*offset_ptr); - if (AI != cu->getContext().relocMap().end()) { + = cu->getRelocMap()->find(*offset_ptr); + if (AI != cu->getRelocMap()->end()) { const std::pair &R = AI->second; - Value.uval = R.second; - *offset_ptr += R.first; + Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()) + + R.second; } else Value.uval = data.getUnsigned(offset_ptr, cu->getAddressByteSize()); - } break; + } case DW_FORM_exprloc: case DW_FORM_block: Value.uval = data.getULEB128(offset_ptr); @@ -149,11 +149,10 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, break; case DW_FORM_strp: { RelocAddrMap::const_iterator AI - = cu->getContext().relocMap().find(*offset_ptr); - if (AI != cu->getContext().relocMap().end()) { + = cu->getRelocMap()->find(*offset_ptr); + if (AI != cu->getRelocMap()->end()) { const std::pair &R = AI->second; - Value.uval = R.second; - *offset_ptr += R.first; + Value.uval = data.getU32(offset_ptr) + R.second; } else Value.uval = data.getU32(offset_ptr); break; @@ -174,10 +173,8 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, indirect = true; break; case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - Value.uval = data.getU32(offset_ptr); - else - Value.uval = data.getU64(offset_ptr); + // FIXME: This is 64-bit for DWARF64. + Value.uval = data.getU32(offset_ptr); break; case DW_FORM_flag_present: Value.uval = 1; @@ -185,6 +182,12 @@ DWARFFormValue::extractValue(DataExtractor data, uint32_t *offset_ptr, case DW_FORM_ref_sig8: Value.uval = data.getU64(offset_ptr); break; + case DW_FORM_GNU_addr_index: + Value.uval = data.getULEB128(offset_ptr); + break; + case DW_FORM_GNU_str_index: + Value.uval = data.getULEB128(offset_ptr); + break; default: return false; } @@ -253,7 +256,7 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // 0 byte values - implied from the form. case DW_FORM_flag_present: return true; - + // 1 byte values case DW_FORM_data1: case DW_FORM_flag: @@ -286,6 +289,8 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, case DW_FORM_sdata: case DW_FORM_udata: case DW_FORM_ref_udata: + case DW_FORM_GNU_str_index: + case DW_FORM_GNU_addr_index: debug_info_data.getULEB128(offset_ptr); return true; @@ -294,14 +299,11 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, form = debug_info_data.getULEB128(offset_ptr); break; - // 4 for DWARF32, 8 for DWARF64. + // FIXME: 4 for DWARF32, 8 for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - *offset_ptr += 4; - else - *offset_ptr += 8; + *offset_ptr += 4; return true; - + default: return false; } @@ -311,12 +313,23 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, void DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { - DataExtractor debug_str_data(cu->getContext().getStringSection(), true, 0); + DataExtractor debug_str_data(cu->getStringSection(), true, 0); + DataExtractor debug_str_offset_data(cu->getStringOffsetSection(), true, 0); uint64_t uvalue = getUnsigned(); bool cu_relative_offset = false; switch (Form) { case DW_FORM_addr: OS << format("0x%016" PRIx64, uvalue); break; + case DW_FORM_GNU_addr_index: { + StringRef AddrOffsetSec = cu->getAddrOffsetSection(); + OS << format(" indexed (%8.8x) address = ", (uint32_t)uvalue); + if (AddrOffsetSec.size() != 0) { + DataExtractor DA(AddrOffsetSec, true, cu->getAddressByteSize()); + OS << format("0x%016" PRIx64, getIndirectAddress(&DA, cu)); + } else + OS << ""; + break; + } case DW_FORM_flag_present: OS << "true"; break; case DW_FORM_flag: case DW_FORM_data1: OS << format("0x%02x", (uint8_t)uvalue); break; @@ -370,6 +383,17 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { } break; } + case DW_FORM_GNU_str_index: { + OS << format(" indexed (%8.8x) string = ", (uint32_t)uvalue); + const char *dbg_str = getIndirectCString(&debug_str_data, + &debug_str_offset_data); + if (dbg_str) { + OS << '"'; + OS.write_escaped(dbg_str); + OS << '"'; + } + break; + } case DW_FORM_ref_addr: OS << format("0x%016" PRIx64, uvalue); break; @@ -400,13 +424,11 @@ DWARFFormValue::dump(raw_ostream &OS, const DWARFCompileUnit *cu) const { OS << "DW_FORM_indirect"; break; + // Should be formatted to 64-bit for DWARF64. case DW_FORM_sec_offset: - if (cu->getAddressByteSize() == 4) - OS << format("0x%08x", (uint32_t)uvalue); - else - OS << format("0x%016" PRIx64, uvalue); + OS << format("0x%08x", (uint32_t)uvalue); break; - + default: OS << format("DW_FORM(0x%4.4x)", Form); break; @@ -427,6 +449,25 @@ DWARFFormValue::getAsCString(const DataExtractor *debug_str_data_ptr) const { return NULL; } +const char* +DWARFFormValue::getIndirectCString(const DataExtractor *DS, + const DataExtractor *DSO) const { + if (!DS || !DSO) return NULL; + + uint32_t offset = Value.uval * 4; + uint32_t soffset = DSO->getU32(&offset); + return DS->getCStr(&soffset); +} + +uint64_t +DWARFFormValue::getIndirectAddress(const DataExtractor *DA, + const DWARFCompileUnit *cu) const { + if (!DA) return 0; + + uint32_t offset = Value.uval * cu->getAddressByteSize(); + return DA->getAddress(&offset); +} + uint64_t DWARFFormValue::getReference(const DWARFCompileUnit *cu) const { uint64_t die_offset = Value.uval; switch (Form) { diff --git a/lib/DebugInfo/DWARFFormValue.h b/lib/DebugInfo/DWARFFormValue.h index c5b590db95f5..b863001e4af8 100644 --- a/lib/DebugInfo/DWARFFormValue.h +++ b/lib/DebugInfo/DWARFFormValue.h @@ -64,6 +64,10 @@ public: uint64_t getUnsigned() const { return Value.uval; } int64_t getSigned() const { return Value.sval; } const char *getAsCString(const DataExtractor *debug_str_data_ptr) const; + const char *getIndirectCString(const DataExtractor *, + const DataExtractor *) const; + uint64_t getIndirectAddress(const DataExtractor *, + const DWARFCompileUnit *) const; bool skipValue(DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFCompileUnit *cu) const; static bool skipValue(uint16_t form, DataExtractor debug_info_data, diff --git a/lib/DebugInfo/DWARFRelocMap.h b/lib/DebugInfo/DWARFRelocMap.h new file mode 100644 index 000000000000..6929e367b84c --- /dev/null +++ b/lib/DebugInfo/DWARFRelocMap.h @@ -0,0 +1,22 @@ +//===-- DWARFRelocMap.h -----------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_DEBUGINFO_DWARFRELOCMAP_H +#define LLVM_DEBUGINFO_DWARFRELOCMAP_H + +#include "llvm/ADT/DenseMap.h" + +namespace llvm { + +typedef DenseMap > RelocAddrMap; + +} // namespace llvm + +#endif // LLVM_DEBUGINFO_DWARFRELOCMAP_H + diff --git a/lib/ExecutionEngine/EventListenerCommon.h b/lib/ExecutionEngine/EventListenerCommon.h index 911d1d68b23a..314db8bd84c2 100644 --- a/lib/ExecutionEngine/EventListenerCommon.h +++ b/lib/ExecutionEngine/EventListenerCommon.h @@ -14,11 +14,11 @@ #ifndef EVENT_LISTENER_COMMON_H #define EVENT_LISTENER_COMMON_H -#include "llvm/DebugInfo.h" -#include "llvm/Metadata.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/ValueHandle.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/Metadata.h" #include "llvm/Support/Path.h" +#include "llvm/Support/ValueHandle.h" namespace llvm { diff --git a/lib/ExecutionEngine/ExecutionEngine.cpp b/lib/ExecutionEngine/ExecutionEngine.cpp index 05987f2b74e7..906a3a3fda7f 100644 --- a/lib/ExecutionEngine/ExecutionEngine.cpp +++ b/lib/ExecutionEngine/ExecutionEngine.cpp @@ -14,22 +14,22 @@ #define DEBUG_TYPE "jit" #include "llvm/ExecutionEngine/ExecutionEngine.h" - -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" -#include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ADT/SmallString.h" #include "llvm/ADT/Statistic.h" +#include "llvm/ExecutionEngine/GenericValue.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Host.h" #include "llvm/Support/MutexGuard.h" +#include "llvm/Support/TargetRegistry.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Support/Host.h" -#include "llvm/Support/TargetRegistry.h" -#include "llvm/DataLayout.h" #include "llvm/Target/TargetMachine.h" #include #include @@ -535,6 +535,8 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { if (isa(C)) { GenericValue Result; switch (C->getType()->getTypeID()) { + default: + break; case Type::IntegerTyID: case Type::X86_FP80TyID: case Type::FP128TyID: @@ -543,7 +545,16 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { // with the correct bit width. Result.IntVal = APInt(C->getType()->getPrimitiveSizeInBits(), 0); break; - default: + case Type::VectorTyID: + // if the whole vector is 'undef' just reserve memory for the value. + const VectorType* VTy = dyn_cast(C->getType()); + const Type *ElemTy = VTy->getElementType(); + unsigned int elemNum = VTy->getNumElements(); + Result.AggregateVal.resize(elemNum); + if (ElemTy->isIntegerTy()) + for (unsigned int i = 0; i < elemNum; ++i) + Result.AggregateVal[i].IntVal = + APInt(ElemTy->getPrimitiveSizeInBits(), 0); break; } return Result; @@ -556,11 +567,11 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Instruction::GetElementPtr: { // Compute the index GenericValue Result = getConstantValue(Op0); - SmallVector Indices(CE->op_begin()+1, CE->op_end()); - uint64_t Offset = TD->getIndexedOffset(Op0->getType(), Indices); + APInt Offset(TD->getPointerSizeInBits(), 0); + cast(CE)->accumulateConstantOffset(*TD, Offset); char* tmp = (char*) Result.PointerVal; - Result = PTOGV(tmp + Offset); + Result = PTOGV(tmp + Offset.getSExtValue()); return Result; } case Instruction::Trunc: { @@ -632,7 +643,7 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { else if (Op0->getType()->isDoubleTy()) GV.IntVal = APIntOps::RoundDoubleToAPInt(GV.DoubleVal, BitWidth); else if (Op0->getType()->isX86_FP80Ty()) { - APFloat apf = APFloat(GV.IntVal); + APFloat apf = APFloat(APFloat::x87DoubleExtended, GV.IntVal); uint64_t v; bool ignored; (void)apf.convertToInteger(&v, BitWidth, @@ -751,27 +762,32 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { case Type::X86_FP80TyID: case Type::PPC_FP128TyID: case Type::FP128TyID: { - APFloat apfLHS = APFloat(LHS.IntVal); + const fltSemantics &Sem = CE->getOperand(0)->getType()->getFltSemantics(); + APFloat apfLHS = APFloat(Sem, LHS.IntVal); switch (CE->getOpcode()) { default: llvm_unreachable("Invalid long double opcode"); case Instruction::FAdd: - apfLHS.add(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.add(APFloat(Sem, RHS.IntVal), APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FSub: - apfLHS.subtract(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.subtract(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FMul: - apfLHS.multiply(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.multiply(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FDiv: - apfLHS.divide(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.divide(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; case Instruction::FRem: - apfLHS.mod(APFloat(RHS.IntVal), APFloat::rmNearestTiesToEven); + apfLHS.mod(APFloat(Sem, RHS.IntVal), + APFloat::rmNearestTiesToEven); GV.IntVal = apfLHS.bitcastToAPInt(); break; } @@ -820,6 +836,101 @@ GenericValue ExecutionEngine::getConstantValue(const Constant *C) { else llvm_unreachable("Unknown constant pointer type!"); break; + case Type::VectorTyID: { + unsigned elemNum; + Type* ElemTy; + const ConstantDataVector *CDV = dyn_cast(C); + const ConstantVector *CV = dyn_cast(C); + const ConstantAggregateZero *CAZ = dyn_cast(C); + + if (CDV) { + elemNum = CDV->getNumElements(); + ElemTy = CDV->getElementType(); + } else if (CV || CAZ) { + VectorType* VTy = dyn_cast(C->getType()); + elemNum = VTy->getNumElements(); + ElemTy = VTy->getElementType(); + } else { + llvm_unreachable("Unknown constant vector type!"); + } + + Result.AggregateVal.resize(elemNum); + // Check if vector holds floats. + if(ElemTy->isFloatTy()) { + if (CAZ) { + GenericValue floatZero; + floatZero.FloatVal = 0.f; + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + floatZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa(CV->getOperand(i))) + Result.AggregateVal[i].FloatVal = cast( + CV->getOperand(i))->getValueAPF().convertToFloat(); + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].FloatVal = CDV->getElementAsFloat(i); + + break; + } + // Check if vector holds doubles. + if (ElemTy->isDoubleTy()) { + if (CAZ) { + GenericValue doubleZero; + doubleZero.DoubleVal = 0.0; + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + doubleZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa(CV->getOperand(i))) + Result.AggregateVal[i].DoubleVal = cast( + CV->getOperand(i))->getValueAPF().convertToDouble(); + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].DoubleVal = CDV->getElementAsDouble(i); + + break; + } + // Check if vector holds integers. + if (ElemTy->isIntegerTy()) { + if (CAZ) { + GenericValue intZero; + intZero.IntVal = APInt(ElemTy->getScalarSizeInBits(), 0ull); + std::fill(Result.AggregateVal.begin(), Result.AggregateVal.end(), + intZero); + break; + } + if(CV) { + for (unsigned i = 0; i < elemNum; ++i) + if (!isa(CV->getOperand(i))) + Result.AggregateVal[i].IntVal = cast( + CV->getOperand(i))->getValue(); + else { + Result.AggregateVal[i].IntVal = + APInt(CV->getOperand(i)->getType()->getPrimitiveSizeInBits(), 0); + } + break; + } + if(CDV) + for (unsigned i = 0; i < elemNum; ++i) + Result.AggregateVal[i].IntVal = APInt( + CDV->getElementType()->getPrimitiveSizeInBits(), + CDV->getElementAsInteger(i)); + + break; + } + llvm_unreachable("Unknown constant pointer type!"); + } + break; + default: SmallString<256> Msg; raw_svector_ostream OS(Msg); @@ -861,6 +972,9 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, const unsigned StoreBytes = getDataLayout()->getTypeStoreSize(Ty); switch (Ty->getTypeID()) { + default: + dbgs() << "Cannot store value of type " << *Ty << "!\n"; + break; case Type::IntegerTyID: StoreIntToMemory(Val.IntVal, (uint8_t*)Ptr, StoreBytes); break; @@ -880,8 +994,19 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, *((PointerTy*)Ptr) = Val.PointerVal; break; - default: - dbgs() << "Cannot store value of type " << *Ty << "!\n"; + case Type::VectorTyID: + for (unsigned i = 0; i < Val.AggregateVal.size(); ++i) { + if (cast(Ty)->getElementType()->isDoubleTy()) + *(((double*)Ptr)+i) = Val.AggregateVal[i].DoubleVal; + if (cast(Ty)->getElementType()->isFloatTy()) + *(((float*)Ptr)+i) = Val.AggregateVal[i].FloatVal; + if (cast(Ty)->getElementType()->isIntegerTy()) { + unsigned numOfBytes =(Val.AggregateVal[i].IntVal.getBitWidth()+7)/8; + StoreIntToMemory(Val.AggregateVal[i].IntVal, + (uint8_t*)Ptr + numOfBytes*i, numOfBytes); + } + } + break; } if (sys::isLittleEndianHost() != getDataLayout()->isLittleEndian()) @@ -893,7 +1018,8 @@ void ExecutionEngine::StoreValueToMemory(const GenericValue &Val, /// from Src into IntVal, which is assumed to be wide enough and to hold zero. static void LoadIntFromMemory(APInt &IntVal, uint8_t *Src, unsigned LoadBytes) { assert((IntVal.getBitWidth()+7)/8 >= LoadBytes && "Integer too small!"); - uint8_t *Dst = (uint8_t *)IntVal.getRawData(); + uint8_t *Dst = reinterpret_cast( + const_cast(IntVal.getRawData())); if (sys::isLittleEndianHost()) // Little-endian host - the destination must be ordered from LSB to MSB. @@ -945,6 +1071,31 @@ void ExecutionEngine::LoadValueFromMemory(GenericValue &Result, Result.IntVal = APInt(80, y); break; } + case Type::VectorTyID: { + const VectorType *VT = cast(Ty); + const Type *ElemT = VT->getElementType(); + const unsigned numElems = VT->getNumElements(); + if (ElemT->isFloatTy()) { + Result.AggregateVal.resize(numElems); + for (unsigned i = 0; i < numElems; ++i) + Result.AggregateVal[i].FloatVal = *((float*)Ptr+i); + } + if (ElemT->isDoubleTy()) { + Result.AggregateVal.resize(numElems); + for (unsigned i = 0; i < numElems; ++i) + Result.AggregateVal[i].DoubleVal = *((double*)Ptr+i); + } + if (ElemT->isIntegerTy()) { + GenericValue intZero; + const unsigned elemBitWidth = cast(ElemT)->getBitWidth(); + intZero.IntVal = APInt(elemBitWidth, 0); + Result.AggregateVal.resize(numElems, intZero); + for (unsigned i = 0; i < numElems; ++i) + LoadIntFromMemory(Result.AggregateVal[i].IntVal, + (uint8_t*)Ptr+((elemBitWidth+7)/8)*i, (elemBitWidth+7)/8); + } + break; + } default: SmallString<256> Msg; raw_svector_ostream OS(Msg); diff --git a/lib/ExecutionEngine/ExecutionEngineBindings.cpp b/lib/ExecutionEngine/ExecutionEngineBindings.cpp index 1e790e781da0..f4e8246476a5 100644 --- a/lib/ExecutionEngine/ExecutionEngineBindings.cpp +++ b/lib/ExecutionEngine/ExecutionEngineBindings.cpp @@ -13,8 +13,8 @@ #define DEBUG_TYPE "jit" #include "llvm-c/ExecutionEngine.h" -#include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" +#include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/Support/ErrorHandling.h" #include diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp index 4cb0270d576d..7dc295fcbf73 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventListener.cpp @@ -17,11 +17,14 @@ #define DEBUG_TYPE "amplifier-jit-event-listener" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" -#include "llvm/Metadata.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/Metadata.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" +#include "llvm/DebugInfo/DIContext.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Support/Errno.h" @@ -41,6 +44,11 @@ class IntelJITEventListener : public JITEventListener { MethodIDMap MethodIDs; FilenameCache Filenames; + typedef SmallVector MethodAddressVector; + typedef DenseMap ObjectMap; + + ObjectMap LoadedObjectMap; + public: IntelJITEventListener(IntelJITEventsWrapper* libraryWrapper) { Wrapper.reset(libraryWrapper); @@ -72,6 +80,17 @@ static LineNumberInfo LineStartToIntelJITFormat( return Result; } +static LineNumberInfo DILineInfoToIntelJITFormat(uintptr_t StartAddress, + uintptr_t Address, + DILineInfo Line) { + LineNumberInfo Result; + + Result.Offset = Address - StartAddress; + Result.LineNumber = Line.getLine(); + + return Result; +} + static iJIT_Method_Load FunctionDescToIntelJITFormat( IntelJITEventsWrapper& Wrapper, const char* FnName, @@ -169,9 +188,101 @@ void IntelJITEventListener::NotifyFreeingMachineCode(void *FnStart) { } void IntelJITEventListener::NotifyObjectEmitted(const ObjectImage &Obj) { + // Get the address of the object image for use as a unique identifier + const void* ObjData = Obj.getData().data(); + DIContext* Context = DIContext::getDWARFContext(Obj.getObjectFile()); + MethodAddressVector Functions; + + // Use symbol info to iterate functions in the object. + error_code ec; + for (object::symbol_iterator I = Obj.begin_symbols(), + E = Obj.end_symbols(); + I != E && !ec; + I.increment(ec)) { + std::vector LineInfo; + std::string SourceFileName; + + object::SymbolRef::Type SymType; + if (I->getType(SymType)) continue; + if (SymType == object::SymbolRef::ST_Function) { + StringRef Name; + uint64_t Addr; + uint64_t Size; + if (I->getName(Name)) continue; + if (I->getAddress(Addr)) continue; + if (I->getSize(Size)) continue; + + // Record this address in a local vector + Functions.push_back((void*)Addr); + + // Build the function loaded notification message + iJIT_Method_Load FunctionMessage = FunctionDescToIntelJITFormat(*Wrapper, + Name.data(), + Addr, + Size); + if (Context) { + DILineInfoTable Lines = Context->getLineInfoForAddressRange(Addr, Size); + DILineInfoTable::iterator Begin = Lines.begin(); + DILineInfoTable::iterator End = Lines.end(); + for (DILineInfoTable::iterator It = Begin; It != End; ++It) { + LineInfo.push_back(DILineInfoToIntelJITFormat((uintptr_t)Addr, + It->first, + It->second)); + } + if (LineInfo.size() == 0) { + FunctionMessage.source_file_name = 0; + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } else { + SourceFileName = Lines.front().second.getFileName(); + FunctionMessage.source_file_name = (char *)SourceFileName.c_str(); + FunctionMessage.line_number_size = LineInfo.size(); + FunctionMessage.line_number_table = &*LineInfo.begin(); + } + } else { + FunctionMessage.source_file_name = 0; + FunctionMessage.line_number_size = 0; + FunctionMessage.line_number_table = 0; + } + + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_LOAD_FINISHED, + &FunctionMessage); + MethodIDs[(void*)Addr] = FunctionMessage.method_id; + } + } + + // To support object unload notification, we need to keep a list of + // registered function addresses for each loaded object. We will + // use the MethodIDs map to get the registered ID for each function. + LoadedObjectMap[ObjData] = Functions; } void IntelJITEventListener::NotifyFreeingObject(const ObjectImage &Obj) { + // Get the address of the object image for use as a unique identifier + const void* ObjData = Obj.getData().data(); + + // Get the object's function list from LoadedObjectMap + ObjectMap::iterator OI = LoadedObjectMap.find(ObjData); + if (OI == LoadedObjectMap.end()) + return; + MethodAddressVector& Functions = OI->second; + + // Walk the function list, unregistering each function + for (MethodAddressVector::iterator FI = Functions.begin(), + FE = Functions.end(); + FI != FE; + ++FI) { + void* FnStart = const_cast(*FI); + MethodIDMap::iterator MI = MethodIDs.find(FnStart); + if (MI != MethodIDs.end()) { + Wrapper->iJIT_NotifyEvent(iJVM_EVENT_TYPE_METHOD_UNLOAD_START, + &MI->second); + MethodIDs.erase(MI); + } + } + + // Erase the object from LoadedObjectMap + LoadedObjectMap.erase(OI); } } // anonymous namespace. diff --git a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h index 7ab08e15a8b3..3d9ff5351610 100644 --- a/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h +++ b/lib/ExecutionEngine/IntelJITEvents/IntelJITEventsWrapper.h @@ -35,8 +35,6 @@ class IntelJITEventsWrapper { NotifyEventPtr NotifyEventFunc; RegisterCallbackExPtr RegisterCallbackExFunc; IsProfilingActivePtr IsProfilingActiveFunc; - FinalizeThreadPtr FinalizeThreadFunc; - FinalizeProcessPtr FinalizeProcessFunc; GetNewMethodIDPtr GetNewMethodIDFunc; public: @@ -48,8 +46,6 @@ public: : NotifyEventFunc(::iJIT_NotifyEvent), RegisterCallbackExFunc(::iJIT_RegisterCallbackEx), IsProfilingActiveFunc(::iJIT_IsProfilingActive), - FinalizeThreadFunc(::FinalizeThread), - FinalizeProcessFunc(::FinalizeProcess), GetNewMethodIDFunc(::iJIT_GetNewMethodID) { } @@ -62,8 +58,6 @@ public: : NotifyEventFunc(NotifyEventImpl), RegisterCallbackExFunc(RegisterCallbackExImpl), IsProfilingActiveFunc(IsProfilingActiveImpl), - FinalizeThreadFunc(FinalizeThreadImpl), - FinalizeProcessFunc(FinalizeProcessImpl), GetNewMethodIDFunc(GetNewMethodIDImpl) { } diff --git a/lib/ExecutionEngine/Interpreter/Execution.cpp b/lib/ExecutionEngine/Interpreter/Execution.cpp index 5202b091654e..526c04e082d2 100644 --- a/lib/ExecutionEngine/Interpreter/Execution.cpp +++ b/lib/ExecutionEngine/Interpreter/Execution.cpp @@ -13,16 +13,16 @@ #define DEBUG_TYPE "interpreter" #include "Interpreter.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Instructions.h" -#include "llvm/CodeGen/IntrinsicLowering.h" -#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/IntrinsicLowering.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/GetElementPtrTypeIterator.h" #include "llvm/Support/MathExtras.h" #include #include @@ -1169,10 +1169,12 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { .VarArgs[VAList.UIntPairVal.second]; Type *Ty = I.getType(); switch (Ty->getTypeID()) { - case Type::IntegerTyID: Dest.IntVal = Src.IntVal; - IMPLEMENT_VAARG(Pointer); - IMPLEMENT_VAARG(Float); - IMPLEMENT_VAARG(Double); + case Type::IntegerTyID: + Dest.IntVal = Src.IntVal; + break; + IMPLEMENT_VAARG(Pointer); + IMPLEMENT_VAARG(Float); + IMPLEMENT_VAARG(Double); default: dbgs() << "Unhandled dest type for vaarg instruction: " << *Ty << "\n"; llvm_unreachable(0); @@ -1185,6 +1187,39 @@ void Interpreter::visitVAArgInst(VAArgInst &I) { ++VAList.UIntPairVal.second; } +void Interpreter::visitExtractElementInst(ExtractElementInst &I) { + ExecutionContext &SF = ECStack.back(); + GenericValue Src1 = getOperandValue(I.getOperand(0), SF); + GenericValue Src2 = getOperandValue(I.getOperand(1), SF); + GenericValue Dest; + + Type *Ty = I.getType(); + const unsigned indx = unsigned(Src2.IntVal.getZExtValue()); + + if(Src1.AggregateVal.size() > indx) { + switch (Ty->getTypeID()) { + default: + dbgs() << "Unhandled destination type for extractelement instruction: " + << *Ty << "\n"; + llvm_unreachable(0); + break; + case Type::IntegerTyID: + Dest.IntVal = Src1.AggregateVal[indx].IntVal; + break; + case Type::FloatTyID: + Dest.FloatVal = Src1.AggregateVal[indx].FloatVal; + break; + case Type::DoubleTyID: + Dest.DoubleVal = Src1.AggregateVal[indx].DoubleVal; + break; + } + } else { + dbgs() << "Invalid index in extractelement instruction\n"; + } + + SetValue(&I, Dest, SF); +} + GenericValue Interpreter::getConstantExprValue (ConstantExpr *CE, ExecutionContext &SF) { switch (CE->getOpcode()) { diff --git a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp index e16e2d112a99..bef4bbf66023 100644 --- a/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp +++ b/lib/ExecutionEngine/Interpreter/ExternalFunctions.cpp @@ -20,19 +20,19 @@ //===----------------------------------------------------------------------===// #include "Interpreter.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" #include "llvm/Config/config.h" // Detect libffi -#include "llvm/Support/ErrorHandling.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/DynamicLibrary.h" -#include "llvm/DataLayout.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/Mutex.h" +#include #include #include -#include -#include #include +#include #ifdef HAVE_FFI_CALL #ifdef HAVE_FFI_H diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.cpp b/lib/ExecutionEngine/Interpreter/Interpreter.cpp index 55152dbbea11..9ee9d9456d1d 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.cpp +++ b/lib/ExecutionEngine/Interpreter/Interpreter.cpp @@ -15,8 +15,8 @@ #include "Interpreter.h" #include "llvm/CodeGen/IntrinsicLowering.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include using namespace llvm; diff --git a/lib/ExecutionEngine/Interpreter/Interpreter.h b/lib/ExecutionEngine/Interpreter/Interpreter.h index 72c42c15db30..2952d7eabe2b 100644 --- a/lib/ExecutionEngine/Interpreter/Interpreter.h +++ b/lib/ExecutionEngine/Interpreter/Interpreter.h @@ -14,14 +14,14 @@ #ifndef LLI_INTERPRETER_H #define LLI_INTERPRETER_H -#include "llvm/Function.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/GenericValue.h" -#include "llvm/DataLayout.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" +#include "llvm/InstVisitor.h" #include "llvm/Support/CallSite.h" #include "llvm/Support/DataTypes.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/InstVisitor.h" #include "llvm/Support/raw_ostream.h" namespace llvm { @@ -178,6 +178,7 @@ public: void visitAShr(BinaryOperator &I); void visitVAArgInst(VAArgInst &I); + void visitExtractElementInst(ExtractElementInst &I); void visitInstruction(Instruction &I) { errs() << I << "\n"; llvm_unreachable("Instruction not interpretable yet!"); diff --git a/lib/ExecutionEngine/JIT/JIT.cpp b/lib/ExecutionEngine/JIT/JIT.cpp index 1ad338203a2b..53ea0a260087 100644 --- a/lib/ExecutionEngine/JIT/JIT.cpp +++ b/lib/ExecutionEngine/JIT/JIT.cpp @@ -13,26 +13,26 @@ //===----------------------------------------------------------------------===// #include "JIT.h" -#include "llvm/Constants.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" -#include "llvm/GlobalVariable.h" -#include "llvm/Instructions.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineCodeInfo.h" +#include "llvm/Config/config.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetJITInfo.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" +#include "llvm/IR/GlobalVariable.h" +#include "llvm/IR/Instructions.h" #include "llvm/Support/Dwarf.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" #include "llvm/Support/MutexGuard.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -522,7 +522,8 @@ GenericValue JIT::runFunction(Function *F, case Type::PPC_FP128TyID: case Type::X86_FP80TyID: case Type::FP128TyID: - C = ConstantFP::get(F->getContext(), APFloat(AV.IntVal)); + C = ConstantFP::get(F->getContext(), APFloat(ArgTy->getFltSemantics(), + AV.IntVal)); break; case Type::PointerTyID: void *ArgPtr = GVTOP(AV); diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp index 19c197903a63..35d2b8b1e9f2 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.cpp @@ -12,21 +12,21 @@ // //===----------------------------------------------------------------------===// -#include "JIT.h" #include "JITDwarfEmitter.h" -#include "llvm/Function.h" +#include "JIT.h" #include "llvm/ADT/DenseMap.h" #include "llvm/CodeGen/JITCodeEmitter.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/MC/MachineLocation.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" #include "llvm/MC/MCSymbol.h" +#include "llvm/MC/MachineLocation.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetFrameLowering.h" +#include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetMachine.h" #include "llvm/Target/TargetRegisterInfo.h" using namespace llvm; diff --git a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h index 9cdbeac86ace..98ac34049176 100644 --- a/lib/ExecutionEngine/JIT/JITDwarfEmitter.h +++ b/lib/ExecutionEngine/JIT/JITDwarfEmitter.h @@ -15,9 +15,13 @@ #ifndef LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H #define LLVM_EXECUTION_ENGINE_JIT_DWARFEMITTER_H +#include "llvm/Support/DataTypes.h" +#include + namespace llvm { class Function; +class JIT; class JITCodeEmitter; class MachineFunction; class MachineModuleInfo; diff --git a/lib/ExecutionEngine/JIT/JITEmitter.cpp b/lib/ExecutionEngine/JIT/JITEmitter.cpp index ecafda7286f6..c27387699ab6 100644 --- a/lib/ExecutionEngine/JIT/JITEmitter.cpp +++ b/lib/ExecutionEngine/JIT/JITEmitter.cpp @@ -15,39 +15,39 @@ #define DEBUG_TYPE "jit" #include "JIT.h" #include "JITDwarfEmitter.h" +#include "llvm/ADT/DenseMap.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/Constants.h" -#include "llvm/DebugInfo.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Module.h" +#include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallVector.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/ADT/ValueMap.h" #include "llvm/CodeGen/JITCodeEmitter.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineCodeInfo.h" #include "llvm/CodeGen/MachineConstantPool.h" +#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineJumpTableInfo.h" #include "llvm/CodeGen/MachineModuleInfo.h" #include "llvm/CodeGen/MachineRelocation.h" +#include "llvm/DebugInfo.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" -#include "llvm/DataLayout.h" -#include "llvm/Target/TargetInstrInfo.h" -#include "llvm/Target/TargetJITInfo.h" -#include "llvm/Target/TargetMachine.h" -#include "llvm/Target/TargetOptions.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/Disassembler.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/ManagedStatic.h" +#include "llvm/Support/Memory.h" #include "llvm/Support/MutexGuard.h" #include "llvm/Support/ValueHandle.h" #include "llvm/Support/raw_ostream.h" -#include "llvm/Support/Disassembler.h" -#include "llvm/Support/Memory.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/SmallPtrSet.h" -#include "llvm/ADT/SmallVector.h" -#include "llvm/ADT/Statistic.h" -#include "llvm/ADT/ValueMap.h" +#include "llvm/Target/TargetInstrInfo.h" +#include "llvm/Target/TargetJITInfo.h" +#include "llvm/Target/TargetMachine.h" +#include "llvm/Target/TargetOptions.h" #include #ifndef NDEBUG #include @@ -969,14 +969,24 @@ bool JITEmitter::finishFunction(MachineFunction &F) { SavedBufferBegin = BufferBegin; SavedBufferEnd = BufferEnd; SavedCurBufferPtr = CurBufferPtr; - - BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), - ActualSize); - BufferEnd = BufferBegin+ActualSize; - EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; - uint8_t *EhStart; - uint8_t *FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, - EhStart); + uint8_t *FrameRegister; + + while (true) { + BufferBegin = CurBufferPtr = MemMgr->startExceptionTable(F.getFunction(), + ActualSize); + BufferEnd = BufferBegin+ActualSize; + EmittedFunctions[F.getFunction()].ExceptionTable = BufferBegin; + uint8_t *EhStart; + FrameRegister = DE->EmitDwarfTable(F, *this, FnStart, FnEnd, EhStart); + + // If the buffer was large enough to hold the table then we are done. + if (CurBufferPtr != BufferEnd) + break; + + // Try again with twice as much space. + ActualSize = (CurBufferPtr - BufferBegin) * 2; + MemMgr->deallocateExceptionTable(BufferBegin); + } MemMgr->endExceptionTable(F.getFunction(), BufferBegin, CurBufferPtr, FrameRegister); BufferBegin = SavedBufferBegin; diff --git a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp index 61bc119d305b..66aeb772ddc3 100644 --- a/lib/ExecutionEngine/JIT/JITMemoryManager.cpp +++ b/lib/ExecutionEngine/JIT/JITMemoryManager.cpp @@ -16,20 +16,19 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/Statistic.h" #include "llvm/ADT/Twine.h" -#include "llvm/GlobalValue.h" +#include "llvm/Config/config.h" +#include "llvm/IR/GlobalValue.h" #include "llvm/Support/Allocator.h" #include "llvm/Support/Compiler.h" #include "llvm/Support/Debug.h" +#include "llvm/Support/DynamicLibrary.h" #include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/raw_ostream.h" #include "llvm/Support/Memory.h" -#include "llvm/Support/ErrorHandling.h" -#include "llvm/Support/DynamicLibrary.h" -#include "llvm/Config/config.h" -#include +#include "llvm/Support/raw_ostream.h" #include #include #include +#include #if defined(__linux__) #if defined(HAVE_SYS_STAT_H) @@ -73,15 +72,20 @@ namespace { /// getBlockAfter - Return the memory block immediately after this one. /// MemoryRangeHeader &getBlockAfter() const { - return *(MemoryRangeHeader*)((char*)this+BlockSize); + return *reinterpret_cast( + reinterpret_cast( + const_cast(this))+BlockSize); } /// getFreeBlockBefore - If the block before this one is free, return it, /// otherwise return null. FreeRangeHeader *getFreeBlockBefore() const { if (PrevAllocated) return 0; - intptr_t PrevSize = ((intptr_t *)this)[-1]; - return (FreeRangeHeader*)((char*)this-PrevSize); + intptr_t PrevSize = reinterpret_cast( + const_cast(this))[-1]; + return reinterpret_cast( + reinterpret_cast( + const_cast(this))-PrevSize); } /// FreeBlock - Turn an allocated block into a free block, adjusting @@ -501,10 +505,14 @@ namespace { /// allocateDataSection - Allocate memory for a data section. uint8_t *allocateDataSection(uintptr_t Size, unsigned Alignment, - unsigned SectionID) { + unsigned SectionID, bool IsReadOnly) { return (uint8_t*)DataAllocator.Allocate(Size, Alignment); } + bool applyPermissions(std::string *ErrMsg) { + return false; + } + /// startExceptionTable - Use startFunctionBody to allocate memory for the /// function's exception table. uint8_t* startExceptionTable(const Function* F, uintptr_t &ActualSize) { diff --git a/lib/ExecutionEngine/MCJIT/CMakeLists.txt b/lib/ExecutionEngine/MCJIT/CMakeLists.txt index 2911a5077220..088635a0e999 100644 --- a/lib/ExecutionEngine/MCJIT/CMakeLists.txt +++ b/lib/ExecutionEngine/MCJIT/CMakeLists.txt @@ -1,3 +1,4 @@ add_llvm_library(LLVMMCJIT MCJIT.cpp + SectionMemoryManager.cpp ) diff --git a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt index 90f4d2f75e24..900460bf1cb4 100644 --- a/lib/ExecutionEngine/MCJIT/LLVMBuild.txt +++ b/lib/ExecutionEngine/MCJIT/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = MCJIT parent = ExecutionEngine -required_libraries = Core ExecutionEngine RuntimeDyld Support Target +required_libraries = Core ExecutionEngine RuntimeDyld Support Target JIT diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.cpp b/lib/ExecutionEngine/MCJIT/MCJIT.cpp index 752c5b73ea32..fee10e194355 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.cpp +++ b/lib/ExecutionEngine/MCJIT/MCJIT.cpp @@ -8,20 +8,20 @@ //===----------------------------------------------------------------------===// #include "MCJIT.h" -#include "llvm/DerivedTypes.h" -#include "llvm/Function.h" #include "llvm/ExecutionEngine/GenericValue.h" #include "llvm/ExecutionEngine/JITEventListener.h" #include "llvm/ExecutionEngine/JITMemoryManager.h" #include "llvm/ExecutionEngine/MCJIT.h" #include "llvm/ExecutionEngine/ObjectBuffer.h" #include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/IR/DataLayout.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/Function.h" #include "llvm/MC/MCAsmInfo.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/MutexGuard.h" -#include "llvm/DataLayout.h" using namespace llvm; @@ -118,17 +118,26 @@ void MCJIT::emitObject(Module *m) { // FIXME: Add a parameter to identify which object is being finalized when // MCJIT supports multiple modules. +// FIXME: Provide a way to separate code emission, relocations and page +// protection in the interface. void MCJIT::finalizeObject() { // If the module hasn't been compiled, just do that. if (!isCompiled) { // If the call to Dyld.resolveRelocations() is removed from emitObject() // we'll need to do that here. emitObject(M); + + // Set page permissions. + MemMgr->applyPermissions(); + return; } // Resolve any relocations. Dyld.resolveRelocations(); + + // Set page permissions. + MemMgr->applyPermissions(); } void *MCJIT::getPointerToBasicBlock(BasicBlock *BB) { diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index 571080d2bd22..283a8e528118 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -10,10 +10,10 @@ #ifndef LLVM_LIB_EXECUTIONENGINE_MCJIT_H #define LLVM_LIB_EXECUTIONENGINE_MCJIT_H -#include "llvm/PassManager.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ExecutionEngine/ExecutionEngine.h" #include "llvm/ExecutionEngine/RuntimeDyld.h" +#include "llvm/PassManager.h" namespace llvm { diff --git a/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp new file mode 100644 index 000000000000..fa35acd389ae --- /dev/null +++ b/lib/ExecutionEngine/MCJIT/SectionMemoryManager.cpp @@ -0,0 +1,226 @@ +//===- SectionMemoryManager.cpp - Memory manager for MCJIT/RtDyld *- C++ -*-==// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements the section-based memory manager used by the MCJIT +// execution engine and RuntimeDyld +// +//===----------------------------------------------------------------------===// + +#include "llvm/Config/config.h" +#include "llvm/ExecutionEngine/SectionMemoryManager.h" +#include "llvm/Support/DynamicLibrary.h" +#include "llvm/Support/MathExtras.h" + +#ifdef __linux__ + // These includes used by SectionMemoryManager::getPointerToNamedFunction() + // for Glibc trickery. See comments in this function for more information. + #ifdef HAVE_SYS_STAT_H + #include + #endif + #include + #include +#endif + +namespace llvm { + +uint8_t *SectionMemoryManager::allocateDataSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID, + bool IsReadOnly) { + if (IsReadOnly) + return allocateSection(RODataMem, Size, Alignment); + return allocateSection(RWDataMem, Size, Alignment); +} + +uint8_t *SectionMemoryManager::allocateCodeSection(uintptr_t Size, + unsigned Alignment, + unsigned SectionID) { + return allocateSection(CodeMem, Size, Alignment); +} + +uint8_t *SectionMemoryManager::allocateSection(MemoryGroup &MemGroup, + uintptr_t Size, + unsigned Alignment) { + if (!Alignment) + Alignment = 16; + + assert(!(Alignment & (Alignment - 1)) && "Alignment must be a power of two."); + + uintptr_t RequiredSize = Alignment * ((Size + Alignment - 1)/Alignment + 1); + uintptr_t Addr = 0; + + // Look in the list of free memory regions and use a block there if one + // is available. + for (int i = 0, e = MemGroup.FreeMem.size(); i != e; ++i) { + sys::MemoryBlock &MB = MemGroup.FreeMem[i]; + if (MB.size() >= RequiredSize) { + Addr = (uintptr_t)MB.base(); + uintptr_t EndOfBlock = Addr + MB.size(); + // Align the address. + Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); + // Store cutted free memory block. + MemGroup.FreeMem[i] = sys::MemoryBlock((void*)(Addr + Size), + EndOfBlock - Addr - Size); + return (uint8_t*)Addr; + } + } + + // No pre-allocated free block was large enough. Allocate a new memory region. + // Note that all sections get allocated as read-write. The permissions will + // be updated later based on memory group. + // + // FIXME: It would be useful to define a default allocation size (or add + // it as a constructor parameter) to minimize the number of allocations. + // + // FIXME: Initialize the Near member for each memory group to avoid + // interleaving. + error_code ec; + sys::MemoryBlock MB = sys::Memory::allocateMappedMemory(RequiredSize, + &MemGroup.Near, + sys::Memory::MF_READ | + sys::Memory::MF_WRITE, + ec); + if (ec) { + // FIXME: Add error propogation to the interface. + return NULL; + } + + // Save this address as the basis for our next request + MemGroup.Near = MB; + + MemGroup.AllocatedMem.push_back(MB); + Addr = (uintptr_t)MB.base(); + uintptr_t EndOfBlock = Addr + MB.size(); + + // Align the address. + Addr = (Addr + Alignment - 1) & ~(uintptr_t)(Alignment - 1); + + // The allocateMappedMemory may allocate much more memory than we need. In + // this case, we store the unused memory as a free memory block. + unsigned FreeSize = EndOfBlock-Addr-Size; + if (FreeSize > 16) + MemGroup.FreeMem.push_back(sys::MemoryBlock((void*)(Addr + Size), FreeSize)); + + // Return aligned address + return (uint8_t*)Addr; +} + +bool SectionMemoryManager::applyPermissions(std::string *ErrMsg) +{ + // FIXME: Should in-progress permissions be reverted if an error occurs? + error_code ec; + + // Make code memory executable. + ec = applyMemoryGroupPermissions(CodeMem, + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + if (ec) { + if (ErrMsg) { + *ErrMsg = ec.message(); + } + return true; + } + + // Make read-only data memory read-only. + ec = applyMemoryGroupPermissions(RODataMem, + sys::Memory::MF_READ | sys::Memory::MF_EXEC); + if (ec) { + if (ErrMsg) { + *ErrMsg = ec.message(); + } + return true; + } + + // Read-write data memory already has the correct permissions + + return false; +} + +error_code SectionMemoryManager::applyMemoryGroupPermissions(MemoryGroup &MemGroup, + unsigned Permissions) { + + for (int i = 0, e = MemGroup.AllocatedMem.size(); i != e; ++i) { + error_code ec; + ec = sys::Memory::protectMappedMemory(MemGroup.AllocatedMem[i], + Permissions); + if (ec) { + return ec; + } + } + + return error_code::success(); +} + +void SectionMemoryManager::invalidateInstructionCache() { + for (int i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i) + sys::Memory::InvalidateInstructionCache(CodeMem.AllocatedMem[i].base(), + CodeMem.AllocatedMem[i].size()); +} + +static int jit_noop() { + return 0; +} + +void *SectionMemoryManager::getPointerToNamedFunction(const std::string &Name, + bool AbortOnFailure) { +#if defined(__linux__) + //===--------------------------------------------------------------------===// + // Function stubs that are invoked instead of certain library calls + // + // Force the following functions to be linked in to anything that uses the + // JIT. This is a hack designed to work around the all-too-clever Glibc + // strategy of making these functions work differently when inlined vs. when + // not inlined, and hiding their real definitions in a separate archive file + // that the dynamic linker can't see. For more info, search for + // 'libc_nonshared.a' on Google, or read http://llvm.org/PR274. + if (Name == "stat") return (void*)(intptr_t)&stat; + if (Name == "fstat") return (void*)(intptr_t)&fstat; + if (Name == "lstat") return (void*)(intptr_t)&lstat; + if (Name == "stat64") return (void*)(intptr_t)&stat64; + if (Name == "fstat64") return (void*)(intptr_t)&fstat64; + if (Name == "lstat64") return (void*)(intptr_t)&lstat64; + if (Name == "atexit") return (void*)(intptr_t)&atexit; + if (Name == "mknod") return (void*)(intptr_t)&mknod; +#endif // __linux__ + + // We should not invoke parent's ctors/dtors from generated main()! + // On Mingw and Cygwin, the symbol __main is resolved to + // callee's(eg. tools/lli) one, to invoke wrong duplicated ctors + // (and register wrong callee's dtors with atexit(3)). + // We expect ExecutionEngine::runStaticConstructorsDestructors() + // is called before ExecutionEngine::runFunctionAsMain() is called. + if (Name == "__main") return (void*)(intptr_t)&jit_noop; + + const char *NameStr = Name.c_str(); + void *Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr); + if (Ptr) return Ptr; + + // If it wasn't found and if it starts with an underscore ('_') character, + // try again without the underscore. + if (NameStr[0] == '_') { + Ptr = sys::DynamicLibrary::SearchForAddressOfSymbol(NameStr+1); + if (Ptr) return Ptr; + } + + if (AbortOnFailure) + report_fatal_error("Program used external function '" + Name + + "' which could not be resolved!"); + return 0; +} + +SectionMemoryManager::~SectionMemoryManager() { + for (unsigned i = 0, e = CodeMem.AllocatedMem.size(); i != e; ++i) + sys::Memory::releaseMappedMemory(CodeMem.AllocatedMem[i]); + for (unsigned i = 0, e = RWDataMem.AllocatedMem.size(); i != e; ++i) + sys::Memory::releaseMappedMemory(RWDataMem.AllocatedMem[i]); + for (unsigned i = 0, e = RODataMem.AllocatedMem.size(); i != e; ++i) + sys::Memory::releaseMappedMemory(RODataMem.AllocatedMem[i]); +} + +} // namespace llvm + diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp index 6b8e9d1954b0..38867ecca591 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileJITEventListener.cpp @@ -17,7 +17,7 @@ #define DEBUG_TYPE "oprofile-jit-event-listener" #include "llvm/DebugInfo.h" -#include "llvm/Function.h" +#include "llvm/IR/Function.h" #include "llvm/ADT/OwningPtr.h" #include "llvm/CodeGen/MachineFunction.h" #include "llvm/ExecutionEngine/OProfileWrapper.h" diff --git a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp index d67f5370b862..7c0d39518595 100644 --- a/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp +++ b/lib/ExecutionEngine/OProfileJIT/OProfileWrapper.cpp @@ -29,6 +29,7 @@ #include #include #include +#include namespace { diff --git a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp index 50cd0724ea4f..603c526d06e3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/GDBRegistrar.cpp @@ -9,10 +9,10 @@ #include "JITRegistrar.h" #include "llvm/ADT/DenseMap.h" -#include "llvm/Support/MutexGuard.h" -#include "llvm/Support/Mutex.h" -#include "llvm/Support/ErrorHandling.h" #include "llvm/Support/Compiler.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Mutex.h" +#include "llvm/Support/MutexGuard.h" using namespace llvm; @@ -44,7 +44,7 @@ extern "C" { // We put information about the JITed function in this global, which the // debugger reads. Make sure to specify the version statically, because the // debugger checks the version before we can set it during runtime. - static struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; + struct jit_descriptor __jit_debug_descriptor = { 1, 0, 0, 0 }; // Debuggers puts a breakpoint in this function. LLVM_ATTRIBUTE_NOINLINE void __jit_debug_register_code() { } diff --git a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h index 17f3a2146492..89350cc5b621 100644 --- a/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h +++ b/lib/ExecutionEngine/RuntimeDyld/ObjectImageCommon.h @@ -1,76 +1,78 @@ -//===-- ObjectImageCommon.h - Format independent executuable object image -===// -// -// The LLVM Compiler Infrastructure -// -// This file is distributed under the University of Illinois Open Source -// License. See LICENSE.TXT for details. -// -//===----------------------------------------------------------------------===// -// -// This file declares a file format independent ObjectImage class. -// -//===----------------------------------------------------------------------===// - -#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H -#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H - -#include "llvm/Object/ObjectFile.h" -#include "llvm/ExecutionEngine/ObjectImage.h" -#include "llvm/ExecutionEngine/ObjectBuffer.h" - -namespace llvm { - -class ObjectImageCommon : public ObjectImage { - ObjectImageCommon(); // = delete - ObjectImageCommon(const ObjectImageCommon &other); // = delete - -protected: - object::ObjectFile *ObjFile; - - // This form of the constructor allows subclasses to use - // format-specific subclasses of ObjectFile directly - ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj) - : ObjectImage(Input), // saves Input as Buffer and takes ownership - ObjFile(Obj) - { - } - -public: - ObjectImageCommon(ObjectBuffer* Input) - : ObjectImage(Input) // saves Input as Buffer and takes ownership - { - ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer()); - } - virtual ~ObjectImageCommon() { delete ObjFile; } - - virtual object::symbol_iterator begin_symbols() const - { return ObjFile->begin_symbols(); } - virtual object::symbol_iterator end_symbols() const - { return ObjFile->end_symbols(); } - - virtual object::section_iterator begin_sections() const - { return ObjFile->begin_sections(); } - virtual object::section_iterator end_sections() const - { return ObjFile->end_sections(); } - - virtual /* Triple::ArchType */ unsigned getArch() const - { return ObjFile->getArch(); } - - virtual StringRef getData() const { return ObjFile->getData(); } - - // Subclasses can override these methods to update the image with loaded - // addresses for sections and common symbols - virtual void updateSectionAddress(const object::SectionRef &Sec, - uint64_t Addr) {} - virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) - {} - - // Subclasses can override these methods to provide JIT debugging support - virtual void registerWithDebugger() {} - virtual void deregisterWithDebugger() {} -}; - -} // end namespace llvm - -#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H - +//===-- ObjectImageCommon.h - Format independent executuable object image -===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file declares a file format independent ObjectImage class. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H +#define LLVM_RUNTIMEDYLD_OBJECTIMAGECOMMON_H + +#include "llvm/ExecutionEngine/ObjectBuffer.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/Object/ObjectFile.h" + +namespace llvm { + +class ObjectImageCommon : public ObjectImage { + ObjectImageCommon(); // = delete + ObjectImageCommon(const ObjectImageCommon &other); // = delete + +protected: + object::ObjectFile *ObjFile; + + // This form of the constructor allows subclasses to use + // format-specific subclasses of ObjectFile directly + ObjectImageCommon(ObjectBuffer *Input, object::ObjectFile *Obj) + : ObjectImage(Input), // saves Input as Buffer and takes ownership + ObjFile(Obj) + { + } + +public: + ObjectImageCommon(ObjectBuffer* Input) + : ObjectImage(Input) // saves Input as Buffer and takes ownership + { + ObjFile = object::ObjectFile::createObjectFile(Buffer->getMemBuffer()); + } + virtual ~ObjectImageCommon() { delete ObjFile; } + + virtual object::symbol_iterator begin_symbols() const + { return ObjFile->begin_symbols(); } + virtual object::symbol_iterator end_symbols() const + { return ObjFile->end_symbols(); } + + virtual object::section_iterator begin_sections() const + { return ObjFile->begin_sections(); } + virtual object::section_iterator end_sections() const + { return ObjFile->end_sections(); } + + virtual /* Triple::ArchType */ unsigned getArch() const + { return ObjFile->getArch(); } + + virtual StringRef getData() const { return ObjFile->getData(); } + + virtual object::ObjectFile* getObjectFile() const { return ObjFile; } + + // Subclasses can override these methods to update the image with loaded + // addresses for sections and common symbols + virtual void updateSectionAddress(const object::SectionRef &Sec, + uint64_t Addr) {} + virtual void updateSymbolAddress(const object::SymbolRef &Sym, uint64_t Addr) + {} + + // Subclasses can override these methods to provide JIT debugging support + virtual void registerWithDebugger() {} + virtual void deregisterWithDebugger() {} +}; + +} // end namespace llvm + +#endif // LLVM_RUNTIMEDYLD_OBJECT_IMAGE_H + diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index a180e36e83f8..409b25fef3af 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -12,12 +12,13 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dyld" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "ObjectImageCommon.h" -#include "RuntimeDyldImpl.h" #include "RuntimeDyldELF.h" +#include "RuntimeDyldImpl.h" #include "RuntimeDyldMachO.h" -#include "llvm/Support/Path.h" #include "llvm/Support/MathExtras.h" +#include "llvm/Support/Path.h" using namespace llvm; using namespace llvm::object; @@ -106,28 +107,24 @@ ObjectImage *RuntimeDyldImpl::loadObject(ObjectBuffer *InputBuffer) { SymType == object::SymbolRef::ST_Unknown) { uint64_t FileOffset; StringRef SectionData; + bool IsCode; section_iterator si = obj->end_sections(); Check(i->getFileOffset(FileOffset)); Check(i->getSection(si)); if (si == obj->end_sections()) continue; Check(si->getContents(SectionData)); + Check(si->isText(IsCode)); const uint8_t* SymPtr = (const uint8_t*)InputBuffer->getBufferStart() + (uintptr_t)FileOffset; uintptr_t SectOffset = (uintptr_t)(SymPtr - (const uint8_t*)SectionData.begin()); - unsigned SectionID = - findOrEmitSection(*obj, - *si, - SymType == object::SymbolRef::ST_Function, - LocalSections); + unsigned SectionID = findOrEmitSection(*obj, *si, IsCode, LocalSections); LocalSymbols[Name.data()] = SymbolLoc(SectionID, SectOffset); DEBUG(dbgs() << "\tFileOffset: " << format("%p", (uintptr_t)FileOffset) << " flags: " << flags << " SID: " << SectionID << " Offset: " << format("%p", SectOffset)); - bool isGlobal = flags & SymbolRef::SF_Global; - if (isGlobal) - GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset); + GlobalSymbolTable[Name] = SymbolLoc(SectionID, SectOffset); } } DEBUG(dbgs() << "\tType: " << SymType << " Name: " << Name << "\n"); @@ -182,7 +179,7 @@ void RuntimeDyldImpl::emitCommonSymbols(ObjectImage &Obj, // Allocate memory for the section unsigned SectionID = Sections.size(); uint8_t *Addr = MemMgr->allocateDataSection(TotalSize, sizeof(void*), - SectionID); + SectionID, false); if (!Addr) report_fatal_error("Unable to allocate memory for common symbols!"); uint64_t Offset = 0; @@ -237,11 +234,13 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, bool IsRequired; bool IsVirtual; bool IsZeroInit; + bool IsReadOnly; uint64_t DataSize; StringRef Name; Check(Section.isRequiredForExecution(IsRequired)); Check(Section.isVirtual(IsVirtual)); Check(Section.isZeroInit(IsZeroInit)); + Check(Section.isReadOnlyData(IsReadOnly)); Check(Section.getSize(DataSize)); Check(Section.getName(Name)); @@ -256,7 +255,7 @@ unsigned RuntimeDyldImpl::emitSection(ObjectImage &Obj, Allocate = DataSize + StubBufSize; Addr = IsCode ? MemMgr->allocateCodeSection(Allocate, Alignment, SectionID) - : MemMgr->allocateDataSection(Allocate, Alignment, SectionID); + : MemMgr->allocateDataSection(Allocate, Alignment, SectionID, IsReadOnly); if (!Addr) report_fatal_error("Unable to allocate section memory!"); @@ -433,14 +432,20 @@ void RuntimeDyldImpl::resolveExternalSymbols() { RelocationList &Relocs = i->second; SymbolTableMap::const_iterator Loc = GlobalSymbolTable.find(Name); if (Loc == GlobalSymbolTable.end()) { - // This is an external symbol, try to get it address from - // MemoryManager. - uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), + if (Name.size() == 0) { + // This is an absolute symbol, use an address of zero. + DEBUG(dbgs() << "Resolving absolute relocations." << "\n"); + resolveRelocationList(Relocs, 0); + } else { + // This is an external symbol, try to get its address from + // MemoryManager. + uint8_t *Addr = (uint8_t*) MemMgr->getPointerToNamedFunction(Name.data(), true); - DEBUG(dbgs() << "Resolving relocations Name: " << Name - << "\t" << format("%p", Addr) - << "\n"); - resolveRelocationList(Relocs, (uintptr_t)Addr); + DEBUG(dbgs() << "Resolving relocations Name: " << Name + << "\t" << format("%p", Addr) + << "\n"); + resolveRelocationList(Relocs, (uintptr_t)Addr); + } } else { report_fatal_error("Expected external symbol"); } @@ -451,6 +456,12 @@ void RuntimeDyldImpl::resolveExternalSymbols() { //===----------------------------------------------------------------------===// // RuntimeDyld class implementation RuntimeDyld::RuntimeDyld(RTDyldMemoryManager *mm) { + // FIXME: There's a potential issue lurking here if a single instance of + // RuntimeDyld is used to load multiple objects. The current implementation + // associates a single memory manager with a RuntimeDyld instance. Even + // though the public class spawns a new 'impl' instance for each load, + // they share a single memory manager. This can become a problem when page + // permissions are applied. Dyld = 0; MM = mm; } diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp index f7015cdf6b5e..b8537b1f2f9c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldELF.cpp @@ -15,16 +15,16 @@ #include "RuntimeDyldELF.h" #include "JITRegistrar.h" #include "ObjectImageCommon.h" +#include "llvm/ADT/IntervalMap.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" -#include "llvm/ADT/IntervalMap.h" -#include "llvm/Object/ObjectFile.h" -#include "llvm/ExecutionEngine/ObjectImage.h" -#include "llvm/ExecutionEngine/ObjectBuffer.h" -#include "llvm/Support/ELF.h" +#include "llvm/ADT/StringRef.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/ObjectBuffer.h" +#include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/Object/ELF.h" +#include "llvm/Object/ObjectFile.h" +#include "llvm/Support/ELF.h" using namespace llvm; using namespace llvm::object; @@ -38,19 +38,22 @@ error_code check(error_code Err) { return Err; } -template -class DyldELFObject : public ELFObjectFile { - LLVM_ELF_IMPORT_TYPES(target_endianness, is64Bits) +template +class DyldELFObject + : public ELFObjectFile { + LLVM_ELF_IMPORT_TYPES(ELFT) - typedef Elf_Shdr_Impl Elf_Shdr; - typedef Elf_Sym_Impl Elf_Sym; - typedef Elf_Rel_Impl Elf_Rel; - typedef Elf_Rel_Impl Elf_Rela; + typedef Elf_Shdr_Impl Elf_Shdr; + typedef Elf_Sym_Impl Elf_Sym; + typedef + Elf_Rel_Impl Elf_Rel; + typedef + Elf_Rel_Impl Elf_Rela; - typedef Elf_Ehdr_Impl Elf_Ehdr; + typedef Elf_Ehdr_Impl Elf_Ehdr; typedef typename ELFDataTypeTypedefHelper< - target_endianness, is64Bits>::value_type addr_type; + ELFT>::value_type addr_type; public: DyldELFObject(MemoryBuffer *Wrapper, error_code &ec); @@ -60,24 +63,25 @@ public: // Methods for type inquiry through isa, cast and dyn_cast static inline bool classof(const Binary *v) { - return (isa >(v) - && classof(cast >(v))); + return (isa >(v) + && classof(cast >(v))); } static inline bool classof( - const ELFObjectFile *v) { + const ELFObjectFile *v) { return v->isDyldType(); } }; -template +template class ELFObjectImage : public ObjectImageCommon { protected: - DyldELFObject *DyldObj; + DyldELFObject *DyldObj; bool Registered; public: ELFObjectImage(ObjectBuffer *Input, - DyldELFObject *Obj) + DyldELFObject *Obj) : ObjectImageCommon(Input, Obj), DyldObj(Obj), Registered(false) {} @@ -113,17 +117,15 @@ class ELFObjectImage : public ObjectImageCommon { // The MemoryBuffer passed into this constructor is just a wrapper around the // actual memory. Ultimately, the Binary parent class will take ownership of // this MemoryBuffer object but not the underlying memory. -template -DyldELFObject::DyldELFObject(MemoryBuffer *Wrapper, - error_code &ec) - : ELFObjectFile(Wrapper, ec) { +template +DyldELFObject::DyldELFObject(MemoryBuffer *Wrapper, error_code &ec) + : ELFObjectFile(Wrapper, ec) { this->isDyldELFObject = true; } -template -void DyldELFObject::updateSectionAddress( - const SectionRef &Sec, - uint64_t Addr) { +template +void DyldELFObject::updateSectionAddress(const SectionRef &Sec, + uint64_t Addr) { DataRefImpl ShdrRef = Sec.getRawDataRefImpl(); Elf_Shdr *shdr = const_cast( reinterpret_cast(ShdrRef.p)); @@ -133,14 +135,12 @@ void DyldELFObject::updateSectionAddress( shdr->sh_addr = static_cast(Addr); } -template -void DyldELFObject::updateSymbolAddress( - const SymbolRef &SymRef, - uint64_t Addr) { +template +void DyldELFObject::updateSymbolAddress(const SymbolRef &SymRef, + uint64_t Addr) { Elf_Sym *sym = const_cast( - ELFObjectFile:: - getSymbol(SymRef.getRawDataRefImpl())); + ELFObjectFile::getSymbol(SymRef.getRawDataRefImpl())); // This assumes the address passed in matches the target address bitness // The template-based type cast handles everything else. @@ -149,7 +149,6 @@ void DyldELFObject::updateSymbolAddress( } // namespace - namespace llvm { ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { @@ -161,24 +160,28 @@ ObjectImage *RuntimeDyldELF::createObjectImage(ObjectBuffer *Buffer) { error_code ec; if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS32 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2MSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else if (Ident.first == ELF::ELFCLASS64 && Ident.second == ELF::ELFDATA2LSB) { - DyldELFObject *Obj = - new DyldELFObject(Buffer->getMemBuffer(), ec); - return new ELFObjectImage(Buffer, Obj); + DyldELFObject > *Obj = + new DyldELFObject >( + Buffer->getMemBuffer(), ec); + return new ELFObjectImage >(Buffer, Obj); } else llvm_unreachable("Unexpected ELF format"); @@ -207,7 +210,7 @@ void RuntimeDyldELF::resolveX86_64Relocation(const SectionEntry &Section, case ELF::R_X86_64_32S: { Value += Addend; assert((Type == ELF::R_X86_64_32 && (Value <= UINT32_MAX)) || - (Type == ELF::R_X86_64_32S && + (Type == ELF::R_X86_64_32S && ((int64_t)Value <= INT32_MAX && (int64_t)Value >= INT32_MIN))); uint32_t TruncatedAddr = (Value & 0xFFFFFFFF); uint32_t *Target = reinterpret_cast(Section.Address + Offset); @@ -288,8 +291,9 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, default: llvm_unreachable("Not implemented relocation type!"); - // Write a 32bit value to relocation address, taking into account the + // Write a 32bit value to relocation address, taking into account the // implicit addend encoded in the target. + case ELF::R_ARM_TARGET1 : case ELF::R_ARM_ABS32 : *TargetPtr += Value; break; @@ -298,7 +302,7 @@ void RuntimeDyldELF::resolveARMRelocation(const SectionEntry &Section, // Last 4 bit should be shifted. case ELF::R_ARM_MOVW_ABS_NC : // We are not expecting any other addend in the relocation address. - // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2 + // Using 0x000F0FFF because MOVW has its 16 bit immediate split into 2 // non-contiguous fields. assert((*TargetPtr & 0x000F0FFF) == 0); Value = Value & 0xFFFF; @@ -516,6 +520,12 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, uint8_t aalk = *(LocalAddress+3); writeInt16BE(LocalAddress + 2, (aalk & 3) | ((Value + Addend) & 0xfffc)); } break; + case ELF::R_PPC64_ADDR32 : { + int32_t Result = static_cast(Value + Addend); + if (SignExtend32<32>(Result) != Result) + llvm_unreachable("Relocation R_PPC64_ADDR32 overflow"); + writeInt32BE(LocalAddress, Result); + } break; case ELF::R_PPC64_REL24 : { uint64_t FinalAddress = (Section.LoadAddress + Offset); int32_t delta = static_cast(Value - FinalAddress + Addend); @@ -524,6 +534,13 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, // Generates a 'bl
' instruction writeInt32BE(LocalAddress, 0x48000001 | (delta & 0x03FFFFFC)); } break; + case ELF::R_PPC64_REL32 : { + uint64_t FinalAddress = (Section.LoadAddress + Offset); + int32_t delta = static_cast(Value - FinalAddress + Addend); + if (SignExtend32<32>(delta) != delta) + llvm_unreachable("Relocation R_PPC64_REL32 overflow"); + writeInt32BE(LocalAddress, delta); + } break; case ELF::R_PPC64_ADDR64 : writeInt64BE(LocalAddress, Value + Addend); break; @@ -543,7 +560,6 @@ void RuntimeDyldELF::resolvePPC64Relocation(const SectionEntry &Section, } } - void RuntimeDyldELF::resolveRelocation(const SectionEntry &Section, uint64_t Offset, uint64_t Value, @@ -623,9 +639,9 @@ void RuntimeDyldELF::processRelocationRef(const ObjRelocationInfo &Rel, // Default to 'true' in case isText fails (though it never does). bool isCode = true; si->isText(isCode); - Value.SectionID = findOrEmitSection(Obj, - (*si), - isCode, + Value.SectionID = findOrEmitSection(Obj, + (*si), + isCode, ObjSectionToID); Value.Addend = Addend; break; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index a292ee1a8479..f1009945775c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -14,12 +14,12 @@ #ifndef LLVM_RUNTIME_DYLD_IMPL_H #define LLVM_RUNTIME_DYLD_IMPL_H -#include "llvm/ExecutionEngine/RuntimeDyld.h" -#include "llvm/ExecutionEngine/ObjectImage.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringMap.h" #include "llvm/ADT/Triple.h" +#include "llvm/ExecutionEngine/ObjectImage.h" +#include "llvm/ExecutionEngine/RuntimeDyld.h" #include "llvm/Object/ObjectFile.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp index 987c0c3afc26..bcc3df1b4e7c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.cpp @@ -12,10 +12,10 @@ //===----------------------------------------------------------------------===// #define DEBUG_TYPE "dyld" +#include "RuntimeDyldMachO.h" #include "llvm/ADT/OwningPtr.h" -#include "llvm/ADT/StringRef.h" #include "llvm/ADT/STLExtras.h" -#include "RuntimeDyldMachO.h" +#include "llvm/ADT/StringRef.h" using namespace llvm; using namespace llvm::object; @@ -96,6 +96,7 @@ bool RuntimeDyldMachO::resolveI386Relocation(uint8_t *LocalAddress, *p++ = (uint8_t)(ValueToWrite & 0xff); ValueToWrite >>= 8; } + return false; } case macho::RIT_Difference: case macho::RIT_Generic_LocalDifference: diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h index fe3539dff6f5..62d84870780c 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldMachO.h @@ -14,10 +14,10 @@ #ifndef LLVM_RUNTIME_DYLD_MACHO_H #define LLVM_RUNTIME_DYLD_MACHO_H +#include "RuntimeDyldImpl.h" #include "llvm/ADT/IndexedMap.h" #include "llvm/Object/MachOObject.h" #include "llvm/Support/Format.h" -#include "RuntimeDyldImpl.h" using namespace llvm; using namespace llvm::object; diff --git a/lib/ExecutionEngine/TargetSelect.cpp b/lib/ExecutionEngine/TargetSelect.cpp index 8b6104fdca9c..ca4330fa22b0 100644 --- a/lib/ExecutionEngine/TargetSelect.cpp +++ b/lib/ExecutionEngine/TargetSelect.cpp @@ -15,13 +15,13 @@ //===----------------------------------------------------------------------===// #include "llvm/ExecutionEngine/ExecutionEngine.h" -#include "llvm/Module.h" #include "llvm/ADT/Triple.h" +#include "llvm/IR/Module.h" #include "llvm/MC/SubtargetFeature.h" -#include "llvm/Target/TargetMachine.h" #include "llvm/Support/CommandLine.h" #include "llvm/Support/Host.h" #include "llvm/Support/TargetRegistry.h" +#include "llvm/Target/TargetMachine.h" using namespace llvm; @@ -32,8 +32,7 @@ TargetMachine *EngineBuilder::selectTarget() { // must use the host architecture. if (UseMCJIT && WhichEngine != EngineKind::Interpreter && M) TT.setTriple(M->getTargetTriple()); - else - TT.setTriple(LLVM_HOSTTRIPLE); + return selectTarget(TT, MArch, MCPU, MAttrs); } @@ -45,7 +44,7 @@ TargetMachine *EngineBuilder::selectTarget(const Triple &TargetTriple, const SmallVectorImpl& MAttrs) { Triple TheTriple(TargetTriple); if (TheTriple.getTriple().empty()) - TheTriple.setTriple(sys::getDefaultTargetTriple()); + TheTriple.setTriple(sys::getProcessTriple()); // Adjust the triple to match what the user requested. const Target *TheTarget = 0; diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp new file mode 100644 index 000000000000..fb591a891dae --- /dev/null +++ b/lib/IR/AsmWriter.cpp @@ -0,0 +1,2236 @@ +//===-- AsmWriter.cpp - Printing LLVM as an assembly file -----------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This library implements the functionality defined in llvm/Assembly/Writer.h +// +// Note that these routines must be extremely tolerant of various errors in the +// LLVM code, because it can be used for debugging transformations. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Assembly/Writer.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/StringExtras.h" +#include "llvm/Assembly/AssemblyAnnotationWriter.h" +#include "llvm/Assembly/PrintModulePass.h" +#include "llvm/DebugInfo.h" +#include "llvm/IR/CallingConv.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DerivedTypes.h" +#include "llvm/IR/InlineAsm.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/LLVMContext.h" +#include "llvm/IR/Module.h" +#include "llvm/IR/Operator.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/IR/ValueSymbolTable.h" +#include "llvm/Support/CFG.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/Dwarf.h" +#include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/FormattedStream.h" +#include "llvm/Support/MathExtras.h" +#include +#include +using namespace llvm; + +// Make virtual table appear in this compilation unit. +AssemblyAnnotationWriter::~AssemblyAnnotationWriter() {} + +//===----------------------------------------------------------------------===// +// Helper Functions +//===----------------------------------------------------------------------===// + +static const Module *getModuleFromVal(const Value *V) { + if (const Argument *MA = dyn_cast(V)) + return MA->getParent() ? MA->getParent()->getParent() : 0; + + if (const BasicBlock *BB = dyn_cast(V)) + return BB->getParent() ? BB->getParent()->getParent() : 0; + + if (const Instruction *I = dyn_cast(V)) { + const Function *M = I->getParent() ? I->getParent()->getParent() : 0; + return M ? M->getParent() : 0; + } + + if (const GlobalValue *GV = dyn_cast(V)) + return GV->getParent(); + return 0; +} + +static void PrintCallingConv(unsigned cc, raw_ostream &Out) { + switch (cc) { + default: Out << "cc" << cc; break; + case CallingConv::Fast: Out << "fastcc"; break; + case CallingConv::Cold: Out << "coldcc"; break; + case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; + case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; + case CallingConv::X86_ThisCall: Out << "x86_thiscallcc"; break; + case CallingConv::Intel_OCL_BI: Out << "intel_ocl_bicc"; break; + case CallingConv::ARM_APCS: Out << "arm_apcscc"; break; + case CallingConv::ARM_AAPCS: Out << "arm_aapcscc"; break; + case CallingConv::ARM_AAPCS_VFP: Out << "arm_aapcs_vfpcc"; break; + case CallingConv::MSP430_INTR: Out << "msp430_intrcc"; break; + case CallingConv::PTX_Kernel: Out << "ptx_kernel"; break; + case CallingConv::PTX_Device: Out << "ptx_device"; break; + } +} + +// PrintEscapedString - Print each character of the specified string, escaping +// it if it is not printable or if it is an escape char. +static void PrintEscapedString(StringRef Name, raw_ostream &Out) { + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + unsigned char C = Name[i]; + if (isprint(C) && C != '\\' && C != '"') + Out << C; + else + Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } +} + +enum PrefixType { + GlobalPrefix, + LabelPrefix, + LocalPrefix, + NoPrefix +}; + +/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either +/// prefixed with % (if the string only contains simple characters) or is +/// surrounded with ""'s (if it has special chars in it). Print it out. +static void PrintLLVMName(raw_ostream &OS, StringRef Name, PrefixType Prefix) { + assert(!Name.empty() && "Cannot get empty name!"); + switch (Prefix) { + case NoPrefix: break; + case GlobalPrefix: OS << '@'; break; + case LabelPrefix: break; + case LocalPrefix: OS << '%'; break; + } + + // Scan the name to see if it needs quotes first. + bool NeedsQuotes = isdigit(static_cast(Name[0])); + if (!NeedsQuotes) { + for (unsigned i = 0, e = Name.size(); i != e; ++i) { + // By making this unsigned, the value passed in to isalnum will always be + // in the range 0-255. This is important when building with MSVC because + // its implementation will assert. This situation can arise when dealing + // with UTF-8 multibyte characters. + unsigned char C = Name[i]; + if (!isalnum(static_cast(C)) && C != '-' && C != '.' && + C != '_') { + NeedsQuotes = true; + break; + } + } + } + + // If we didn't need any quotes, just write out the name in one blast. + if (!NeedsQuotes) { + OS << Name; + return; + } + + // Okay, we need quotes. Output the quotes and escape any scary characters as + // needed. + OS << '"'; + PrintEscapedString(Name, OS); + OS << '"'; +} + +/// PrintLLVMName - Turn the specified name into an 'LLVM name', which is either +/// prefixed with % (if the string only contains simple characters) or is +/// surrounded with ""'s (if it has special chars in it). Print it out. +static void PrintLLVMName(raw_ostream &OS, const Value *V) { + PrintLLVMName(OS, V->getName(), + isa(V) ? GlobalPrefix : LocalPrefix); +} + +//===----------------------------------------------------------------------===// +// TypePrinting Class: Type printing machinery +//===----------------------------------------------------------------------===// + +/// TypePrinting - Type printing machinery. +namespace { +class TypePrinting { + TypePrinting(const TypePrinting &) LLVM_DELETED_FUNCTION; + void operator=(const TypePrinting&) LLVM_DELETED_FUNCTION; +public: + + /// NamedTypes - The named types that are used by the current module. + TypeFinder NamedTypes; + + /// NumberedTypes - The numbered types, along with their value. + DenseMap NumberedTypes; + + + TypePrinting() {} + ~TypePrinting() {} + + void incorporateTypes(const Module &M); + + void print(Type *Ty, raw_ostream &OS); + + void printStructBody(StructType *Ty, raw_ostream &OS); +}; +} // end anonymous namespace. + + +void TypePrinting::incorporateTypes(const Module &M) { + NamedTypes.run(M, false); + + // The list of struct types we got back includes all the struct types, split + // the unnamed ones out to a numbering and remove the anonymous structs. + unsigned NextNumber = 0; + + std::vector::iterator NextToUse = NamedTypes.begin(), I, E; + for (I = NamedTypes.begin(), E = NamedTypes.end(); I != E; ++I) { + StructType *STy = *I; + + // Ignore anonymous types. + if (STy->isLiteral()) + continue; + + if (STy->getName().empty()) + NumberedTypes[STy] = NextNumber++; + else + *NextToUse++ = STy; + } + + NamedTypes.erase(NextToUse, NamedTypes.end()); +} + + +/// CalcTypeName - Write the specified type to the specified raw_ostream, making +/// use of type names or up references to shorten the type name where possible. +void TypePrinting::print(Type *Ty, raw_ostream &OS) { + switch (Ty->getTypeID()) { + case Type::VoidTyID: OS << "void"; break; + case Type::HalfTyID: OS << "half"; break; + case Type::FloatTyID: OS << "float"; break; + case Type::DoubleTyID: OS << "double"; break; + case Type::X86_FP80TyID: OS << "x86_fp80"; break; + case Type::FP128TyID: OS << "fp128"; break; + case Type::PPC_FP128TyID: OS << "ppc_fp128"; break; + case Type::LabelTyID: OS << "label"; break; + case Type::MetadataTyID: OS << "metadata"; break; + case Type::X86_MMXTyID: OS << "x86_mmx"; break; + case Type::IntegerTyID: + OS << 'i' << cast(Ty)->getBitWidth(); + return; + + case Type::FunctionTyID: { + FunctionType *FTy = cast(Ty); + print(FTy->getReturnType(), OS); + OS << " ("; + for (FunctionType::param_iterator I = FTy->param_begin(), + E = FTy->param_end(); I != E; ++I) { + if (I != FTy->param_begin()) + OS << ", "; + print(*I, OS); + } + if (FTy->isVarArg()) { + if (FTy->getNumParams()) OS << ", "; + OS << "..."; + } + OS << ')'; + return; + } + case Type::StructTyID: { + StructType *STy = cast(Ty); + + if (STy->isLiteral()) + return printStructBody(STy, OS); + + if (!STy->getName().empty()) + return PrintLLVMName(OS, STy->getName(), LocalPrefix); + + DenseMap::iterator I = NumberedTypes.find(STy); + if (I != NumberedTypes.end()) + OS << '%' << I->second; + else // Not enumerated, print the hex address. + OS << "%\"type " << STy << '\"'; + return; + } + case Type::PointerTyID: { + PointerType *PTy = cast(Ty); + print(PTy->getElementType(), OS); + if (unsigned AddressSpace = PTy->getAddressSpace()) + OS << " addrspace(" << AddressSpace << ')'; + OS << '*'; + return; + } + case Type::ArrayTyID: { + ArrayType *ATy = cast(Ty); + OS << '[' << ATy->getNumElements() << " x "; + print(ATy->getElementType(), OS); + OS << ']'; + return; + } + case Type::VectorTyID: { + VectorType *PTy = cast(Ty); + OS << "<" << PTy->getNumElements() << " x "; + print(PTy->getElementType(), OS); + OS << '>'; + return; + } + default: + OS << ""; + return; + } +} + +void TypePrinting::printStructBody(StructType *STy, raw_ostream &OS) { + if (STy->isOpaque()) { + OS << "opaque"; + return; + } + + if (STy->isPacked()) + OS << '<'; + + if (STy->getNumElements() == 0) { + OS << "{}"; + } else { + StructType::element_iterator I = STy->element_begin(); + OS << "{ "; + print(*I++, OS); + for (StructType::element_iterator E = STy->element_end(); I != E; ++I) { + OS << ", "; + print(*I, OS); + } + + OS << " }"; + } + if (STy->isPacked()) + OS << '>'; +} + + + +//===----------------------------------------------------------------------===// +// SlotTracker Class: Enumerate slot numbers for unnamed values +//===----------------------------------------------------------------------===// + +namespace { + +/// This class provides computation of slot numbers for LLVM Assembly writing. +/// +class SlotTracker { +public: + /// ValueMap - A mapping of Values to slot numbers. + typedef DenseMap ValueMap; + +private: + /// TheModule - The module for which we are holding slot numbers. + const Module* TheModule; + + /// TheFunction - The function for which we are holding slot numbers. + const Function* TheFunction; + bool FunctionProcessed; + + /// mMap - The slot map for the module level data. + ValueMap mMap; + unsigned mNext; + + /// fMap - The slot map for the function level data. + ValueMap fMap; + unsigned fNext; + + /// mdnMap - Map for MDNodes. + DenseMap mdnMap; + unsigned mdnNext; + + /// asMap - The slot map for attribute sets. + DenseMap asMap; + unsigned asNext; +public: + /// Construct from a module + explicit SlotTracker(const Module *M); + /// Construct from a function, starting out in incorp state. + explicit SlotTracker(const Function *F); + + /// Return the slot number of the specified value in it's type + /// plane. If something is not in the SlotTracker, return -1. + int getLocalSlot(const Value *V); + int getGlobalSlot(const GlobalValue *V); + int getMetadataSlot(const MDNode *N); + int getAttributeGroupSlot(AttributeSet AS); + + /// If you'd like to deal with a function instead of just a module, use + /// this method to get its data into the SlotTracker. + void incorporateFunction(const Function *F) { + TheFunction = F; + FunctionProcessed = false; + } + + /// After calling incorporateFunction, use this method to remove the + /// most recently incorporated function from the SlotTracker. This + /// will reset the state of the machine back to just the module contents. + void purgeFunction(); + + /// MDNode map iterators. + typedef DenseMap::iterator mdn_iterator; + mdn_iterator mdn_begin() { return mdnMap.begin(); } + mdn_iterator mdn_end() { return mdnMap.end(); } + unsigned mdn_size() const { return mdnMap.size(); } + bool mdn_empty() const { return mdnMap.empty(); } + + /// AttributeSet map iterators. + typedef DenseMap::iterator as_iterator; + as_iterator as_begin() { return asMap.begin(); } + as_iterator as_end() { return asMap.end(); } + unsigned as_size() const { return asMap.size(); } + bool as_empty() const { return asMap.empty(); } + + /// This function does the actual initialization. + inline void initialize(); + + // Implementation Details +private: + /// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. + void CreateModuleSlot(const GlobalValue *V); + + /// CreateMetadataSlot - Insert the specified MDNode* into the slot table. + void CreateMetadataSlot(const MDNode *N); + + /// CreateFunctionSlot - Insert the specified Value* into the slot table. + void CreateFunctionSlot(const Value *V); + + /// \brief Insert the specified AttributeSet into the slot table. + void CreateAttributeSetSlot(AttributeSet AS); + + /// Add all of the module level global variables (and their initializers) + /// and function declarations, but not the contents of those functions. + void processModule(); + + /// Add all of the functions arguments, basic blocks, and instructions. + void processFunction(); + + SlotTracker(const SlotTracker &) LLVM_DELETED_FUNCTION; + void operator=(const SlotTracker &) LLVM_DELETED_FUNCTION; +}; + +} // end anonymous namespace + + +static SlotTracker *createSlotTracker(const Value *V) { + if (const Argument *FA = dyn_cast(V)) + return new SlotTracker(FA->getParent()); + + if (const Instruction *I = dyn_cast(V)) + if (I->getParent()) + return new SlotTracker(I->getParent()->getParent()); + + if (const BasicBlock *BB = dyn_cast(V)) + return new SlotTracker(BB->getParent()); + + if (const GlobalVariable *GV = dyn_cast(V)) + return new SlotTracker(GV->getParent()); + + if (const GlobalAlias *GA = dyn_cast(V)) + return new SlotTracker(GA->getParent()); + + if (const Function *Func = dyn_cast(V)) + return new SlotTracker(Func); + + if (const MDNode *MD = dyn_cast(V)) { + if (!MD->isFunctionLocal()) + return new SlotTracker(MD->getFunction()); + + return new SlotTracker((Function *)0); + } + + return 0; +} + +#if 0 +#define ST_DEBUG(X) dbgs() << X +#else +#define ST_DEBUG(X) +#endif + +// Module level constructor. Causes the contents of the Module (sans functions) +// to be added to the slot table. +SlotTracker::SlotTracker(const Module *M) + : TheModule(M), TheFunction(0), FunctionProcessed(false), + mNext(0), fNext(0), mdnNext(0), asNext(0) { +} + +// Function level constructor. Causes the contents of the Module and the one +// function provided to be added to the slot table. +SlotTracker::SlotTracker(const Function *F) + : TheModule(F ? F->getParent() : 0), TheFunction(F), FunctionProcessed(false), + mNext(0), fNext(0), mdnNext(0), asNext(0) { +} + +inline void SlotTracker::initialize() { + if (TheModule) { + processModule(); + TheModule = 0; ///< Prevent re-processing next time we're called. + } + + if (TheFunction && !FunctionProcessed) + processFunction(); +} + +// Iterate through all the global variables, functions, and global +// variable initializers and create slots for them. +void SlotTracker::processModule() { + ST_DEBUG("begin processModule!\n"); + + // Add all of the unnamed global variables to the value table. + for (Module::const_global_iterator I = TheModule->global_begin(), + E = TheModule->global_end(); I != E; ++I) { + if (!I->hasName()) + CreateModuleSlot(I); + } + + // Add metadata used by named metadata. + for (Module::const_named_metadata_iterator + I = TheModule->named_metadata_begin(), + E = TheModule->named_metadata_end(); I != E; ++I) { + const NamedMDNode *NMD = I; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) + CreateMetadataSlot(NMD->getOperand(i)); + } + + for (Module::const_iterator I = TheModule->begin(), E = TheModule->end(); + I != E; ++I) { + if (!I->hasName()) + // Add all the unnamed functions to the table. + CreateModuleSlot(I); + + // Add all the function attributes to the table. + // FIXME: Add attributes of other objects? + AttributeSet FnAttrs = I->getAttributes().getFnAttributes(); + if (FnAttrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(FnAttrs); + } + + ST_DEBUG("end processModule!\n"); +} + +// Process the arguments, basic blocks, and instructions of a function. +void SlotTracker::processFunction() { + ST_DEBUG("begin processFunction!\n"); + fNext = 0; + + // Add all the function arguments with no names. + for(Function::const_arg_iterator AI = TheFunction->arg_begin(), + AE = TheFunction->arg_end(); AI != AE; ++AI) + if (!AI->hasName()) + CreateFunctionSlot(AI); + + ST_DEBUG("Inserting Instructions:\n"); + + SmallVector, 4> MDForInst; + + // Add all of the basic blocks and instructions with no names. + for (Function::const_iterator BB = TheFunction->begin(), + E = TheFunction->end(); BB != E; ++BB) { + if (!BB->hasName()) + CreateFunctionSlot(BB); + + for (BasicBlock::const_iterator I = BB->begin(), E = BB->end(); I != E; + ++I) { + if (!I->getType()->isVoidTy() && !I->hasName()) + CreateFunctionSlot(I); + + // Intrinsics can directly use metadata. We allow direct calls to any + // llvm.foo function here, because the target may not be linked into the + // optimizer. + if (const CallInst *CI = dyn_cast(I)) { + if (Function *F = CI->getCalledFunction()) + if (F->getName().startswith("llvm.")) + for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) + if (MDNode *N = dyn_cast_or_null(I->getOperand(i))) + CreateMetadataSlot(N); + + // Add all the call attributes to the table. + AttributeSet Attrs = CI->getAttributes().getFnAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(Attrs); + } else if (const InvokeInst *II = dyn_cast(I)) { + // Add all the call attributes to the table. + AttributeSet Attrs = II->getAttributes().getFnAttributes(); + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + CreateAttributeSetSlot(Attrs); + } + + // Process metadata attached with this instruction. + I->getAllMetadata(MDForInst); + for (unsigned i = 0, e = MDForInst.size(); i != e; ++i) + CreateMetadataSlot(MDForInst[i].second); + MDForInst.clear(); + } + } + + FunctionProcessed = true; + + ST_DEBUG("end processFunction!\n"); +} + +/// Clean up after incorporating a function. This is the only way to get out of +/// the function incorporation state that affects get*Slot/Create*Slot. Function +/// incorporation state is indicated by TheFunction != 0. +void SlotTracker::purgeFunction() { + ST_DEBUG("begin purgeFunction!\n"); + fMap.clear(); // Simply discard the function level map + TheFunction = 0; + FunctionProcessed = false; + ST_DEBUG("end purgeFunction!\n"); +} + +/// getGlobalSlot - Get the slot number of a global value. +int SlotTracker::getGlobalSlot(const GlobalValue *V) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the value in the module map + ValueMap::iterator MI = mMap.find(V); + return MI == mMap.end() ? -1 : (int)MI->second; +} + +/// getMetadataSlot - Get the slot number of a MDNode. +int SlotTracker::getMetadataSlot(const MDNode *N) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the MDNode in the module map + mdn_iterator MI = mdnMap.find(N); + return MI == mdnMap.end() ? -1 : (int)MI->second; +} + + +/// getLocalSlot - Get the slot number for a value that is local to a function. +int SlotTracker::getLocalSlot(const Value *V) { + assert(!isa(V) && "Can't get a constant or global slot with this!"); + + // Check for uninitialized state and do lazy initialization. + initialize(); + + ValueMap::iterator FI = fMap.find(V); + return FI == fMap.end() ? -1 : (int)FI->second; +} + +int SlotTracker::getAttributeGroupSlot(AttributeSet AS) { + // Check for uninitialized state and do lazy initialization. + initialize(); + + // Find the AttributeSet in the module map. + as_iterator AI = asMap.find(AS); + return AI == asMap.end() ? -1 : (int)AI->second; +} + +/// CreateModuleSlot - Insert the specified GlobalValue* into the slot table. +void SlotTracker::CreateModuleSlot(const GlobalValue *V) { + assert(V && "Can't insert a null Value into SlotTracker!"); + assert(!V->getType()->isVoidTy() && "Doesn't need a slot!"); + assert(!V->hasName() && "Doesn't need a slot!"); + + unsigned DestSlot = mNext++; + mMap[V] = DestSlot; + + ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << + DestSlot << " ["); + // G = Global, F = Function, A = Alias, o = other + ST_DEBUG((isa(V) ? 'G' : + (isa(V) ? 'F' : + (isa(V) ? 'A' : 'o'))) << "]\n"); +} + +/// CreateSlot - Create a new slot for the specified value if it has no name. +void SlotTracker::CreateFunctionSlot(const Value *V) { + assert(!V->getType()->isVoidTy() && !V->hasName() && "Doesn't need a slot!"); + + unsigned DestSlot = fNext++; + fMap[V] = DestSlot; + + // G = Global, F = Function, o = other + ST_DEBUG(" Inserting value [" << V->getType() << "] = " << V << " slot=" << + DestSlot << " [o]\n"); +} + +/// CreateModuleSlot - Insert the specified MDNode* into the slot table. +void SlotTracker::CreateMetadataSlot(const MDNode *N) { + assert(N && "Can't insert a null Value into SlotTracker!"); + + // Don't insert if N is a function-local metadata, these are always printed + // inline. + if (!N->isFunctionLocal()) { + mdn_iterator I = mdnMap.find(N); + if (I != mdnMap.end()) + return; + + unsigned DestSlot = mdnNext++; + mdnMap[N] = DestSlot; + } + + // Recursively add any MDNodes referenced by operands. + for (unsigned i = 0, e = N->getNumOperands(); i != e; ++i) + if (const MDNode *Op = dyn_cast_or_null(N->getOperand(i))) + CreateMetadataSlot(Op); +} + +void SlotTracker::CreateAttributeSetSlot(AttributeSet AS) { + assert(AS.hasAttributes(AttributeSet::FunctionIndex) && + "Doesn't need a slot!"); + + as_iterator I = asMap.find(AS); + if (I != asMap.end()) + return; + + unsigned DestSlot = asNext++; + asMap[AS] = DestSlot; +} + +//===----------------------------------------------------------------------===// +// AsmWriter Implementation +//===----------------------------------------------------------------------===// + +static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, + TypePrinting *TypePrinter, + SlotTracker *Machine, + const Module *Context); + + + +static const char *getPredicateText(unsigned predicate) { + const char * pred = "unknown"; + switch (predicate) { + case FCmpInst::FCMP_FALSE: pred = "false"; break; + case FCmpInst::FCMP_OEQ: pred = "oeq"; break; + case FCmpInst::FCMP_OGT: pred = "ogt"; break; + case FCmpInst::FCMP_OGE: pred = "oge"; break; + case FCmpInst::FCMP_OLT: pred = "olt"; break; + case FCmpInst::FCMP_OLE: pred = "ole"; break; + case FCmpInst::FCMP_ONE: pred = "one"; break; + case FCmpInst::FCMP_ORD: pred = "ord"; break; + case FCmpInst::FCMP_UNO: pred = "uno"; break; + case FCmpInst::FCMP_UEQ: pred = "ueq"; break; + case FCmpInst::FCMP_UGT: pred = "ugt"; break; + case FCmpInst::FCMP_UGE: pred = "uge"; break; + case FCmpInst::FCMP_ULT: pred = "ult"; break; + case FCmpInst::FCMP_ULE: pred = "ule"; break; + case FCmpInst::FCMP_UNE: pred = "une"; break; + case FCmpInst::FCMP_TRUE: pred = "true"; break; + case ICmpInst::ICMP_EQ: pred = "eq"; break; + case ICmpInst::ICMP_NE: pred = "ne"; break; + case ICmpInst::ICMP_SGT: pred = "sgt"; break; + case ICmpInst::ICMP_SGE: pred = "sge"; break; + case ICmpInst::ICMP_SLT: pred = "slt"; break; + case ICmpInst::ICMP_SLE: pred = "sle"; break; + case ICmpInst::ICMP_UGT: pred = "ugt"; break; + case ICmpInst::ICMP_UGE: pred = "uge"; break; + case ICmpInst::ICMP_ULT: pred = "ult"; break; + case ICmpInst::ICMP_ULE: pred = "ule"; break; + } + return pred; +} + +static void writeAtomicRMWOperation(raw_ostream &Out, + AtomicRMWInst::BinOp Op) { + switch (Op) { + default: Out << " "; break; + case AtomicRMWInst::Xchg: Out << " xchg"; break; + case AtomicRMWInst::Add: Out << " add"; break; + case AtomicRMWInst::Sub: Out << " sub"; break; + case AtomicRMWInst::And: Out << " and"; break; + case AtomicRMWInst::Nand: Out << " nand"; break; + case AtomicRMWInst::Or: Out << " or"; break; + case AtomicRMWInst::Xor: Out << " xor"; break; + case AtomicRMWInst::Max: Out << " max"; break; + case AtomicRMWInst::Min: Out << " min"; break; + case AtomicRMWInst::UMax: Out << " umax"; break; + case AtomicRMWInst::UMin: Out << " umin"; break; + } +} + +static void WriteOptimizationInfo(raw_ostream &Out, const User *U) { + if (const FPMathOperator *FPO = dyn_cast(U)) { + // Unsafe algebra implies all the others, no need to write them all out + if (FPO->hasUnsafeAlgebra()) + Out << " fast"; + else { + if (FPO->hasNoNaNs()) + Out << " nnan"; + if (FPO->hasNoInfs()) + Out << " ninf"; + if (FPO->hasNoSignedZeros()) + Out << " nsz"; + if (FPO->hasAllowReciprocal()) + Out << " arcp"; + } + } + + if (const OverflowingBinaryOperator *OBO = + dyn_cast(U)) { + if (OBO->hasNoUnsignedWrap()) + Out << " nuw"; + if (OBO->hasNoSignedWrap()) + Out << " nsw"; + } else if (const PossiblyExactOperator *Div = + dyn_cast(U)) { + if (Div->isExact()) + Out << " exact"; + } else if (const GEPOperator *GEP = dyn_cast(U)) { + if (GEP->isInBounds()) + Out << " inbounds"; + } +} + +static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, + TypePrinting &TypePrinter, + SlotTracker *Machine, + const Module *Context) { + if (const ConstantInt *CI = dyn_cast(CV)) { + if (CI->getType()->isIntegerTy(1)) { + Out << (CI->getZExtValue() ? "true" : "false"); + return; + } + Out << CI->getValue(); + return; + } + + if (const ConstantFP *CFP = dyn_cast(CV)) { + if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEsingle || + &CFP->getValueAPF().getSemantics() == &APFloat::IEEEdouble) { + // We would like to output the FP constant value in exponential notation, + // but we cannot do this if doing so will lose precision. Check here to + // make sure that we only output it in exponential format if we can parse + // the value back and get the same value. + // + bool ignored; + bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf; + bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; + bool isInf = CFP->getValueAPF().isInfinity(); + bool isNaN = CFP->getValueAPF().isNaN(); + if (!isHalf && !isInf && !isNaN) { + double Val = isDouble ? CFP->getValueAPF().convertToDouble() : + CFP->getValueAPF().convertToFloat(); + SmallString<128> StrVal; + raw_svector_ostream(StrVal) << Val; + + // Check to make sure that the stringized number is not some string like + // "Inf" or NaN, that atof will accept, but the lexer will not. Check + // that the string matches the "[-+]?[0-9]" regex. + // + if ((StrVal[0] >= '0' && StrVal[0] <= '9') || + ((StrVal[0] == '-' || StrVal[0] == '+') && + (StrVal[1] >= '0' && StrVal[1] <= '9'))) { + // Reparse stringized version! + if (APFloat(APFloat::IEEEdouble, StrVal).convertToDouble() == Val) { + Out << StrVal.str(); + return; + } + } + } + // Otherwise we could not reparse it to exactly the same value, so we must + // output the string in hexadecimal format! Note that loading and storing + // floating point types changes the bits of NaNs on some hosts, notably + // x86, so we must not use these types. + assert(sizeof(double) == sizeof(uint64_t) && + "assuming that double is 64 bits!"); + char Buffer[40]; + APFloat apf = CFP->getValueAPF(); + // Halves and floats are represented in ASCII IR as double, convert. + if (!isDouble) + apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, + &ignored); + Out << "0x" << + utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), + Buffer+40); + return; + } + + // Either half, or some form of long double. + // These appear as a magic letter identifying the type, then a + // fixed number of hex digits. + Out << "0x"; + // Bit position, in the current word, of the next nibble to print. + int shiftcount; + + if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { + Out << 'K'; + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t* p = api.getRawData(); + uint64_t word = p[1]; + shiftcount = 12; + int width = api.getBitWidth(); + for (int j=0; j>shiftcount) & 15; + if (nibble < 10) + Out << (unsigned char)(nibble + '0'); + else + Out << (unsigned char)(nibble - 10 + 'A'); + if (shiftcount == 0 && j+4 < width) { + word = *p; + shiftcount = 64; + if (width-j-4 < 64) + shiftcount = width-j-4; + } + } + return; + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) { + shiftcount = 60; + Out << 'L'; + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) { + shiftcount = 60; + Out << 'M'; + } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) { + shiftcount = 12; + Out << 'H'; + } else + llvm_unreachable("Unsupported floating point type"); + // api needed to prevent premature destruction + APInt api = CFP->getValueAPF().bitcastToAPInt(); + const uint64_t* p = api.getRawData(); + uint64_t word = *p; + int width = api.getBitWidth(); + for (int j=0; j>shiftcount) & 15; + if (nibble < 10) + Out << (unsigned char)(nibble + '0'); + else + Out << (unsigned char)(nibble - 10 + 'A'); + if (shiftcount == 0 && j+4 < width) { + word = *(++p); + shiftcount = 64; + if (width-j-4 < 64) + shiftcount = width-j-4; + } + } + return; + } + + if (isa(CV)) { + Out << "zeroinitializer"; + return; + } + + if (const BlockAddress *BA = dyn_cast(CV)) { + Out << "blockaddress("; + WriteAsOperandInternal(Out, BA->getFunction(), &TypePrinter, Machine, + Context); + Out << ", "; + WriteAsOperandInternal(Out, BA->getBasicBlock(), &TypePrinter, Machine, + Context); + Out << ")"; + return; + } + + if (const ConstantArray *CA = dyn_cast(CV)) { + Type *ETy = CA->getType()->getElementType(); + Out << '['; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getOperand(0), + &TypePrinter, Machine, + Context); + for (unsigned i = 1, e = CA->getNumOperands(); i != e; ++i) { + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getOperand(i), &TypePrinter, Machine, + Context); + } + Out << ']'; + return; + } + + if (const ConstantDataArray *CA = dyn_cast(CV)) { + // As a special case, print the array as a string if it is an array of + // i8 with ConstantInt values. + if (CA->isString()) { + Out << "c\""; + PrintEscapedString(CA->getAsString(), Out); + Out << '"'; + return; + } + + Type *ETy = CA->getType()->getElementType(); + Out << '['; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getElementAsConstant(0), + &TypePrinter, Machine, + Context); + for (unsigned i = 1, e = CA->getNumElements(); i != e; ++i) { + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CA->getElementAsConstant(i), &TypePrinter, + Machine, Context); + } + Out << ']'; + return; + } + + + if (const ConstantStruct *CS = dyn_cast(CV)) { + if (CS->getType()->isPacked()) + Out << '<'; + Out << '{'; + unsigned N = CS->getNumOperands(); + if (N) { + Out << ' '; + TypePrinter.print(CS->getOperand(0)->getType(), Out); + Out << ' '; + + WriteAsOperandInternal(Out, CS->getOperand(0), &TypePrinter, Machine, + Context); + + for (unsigned i = 1; i < N; i++) { + Out << ", "; + TypePrinter.print(CS->getOperand(i)->getType(), Out); + Out << ' '; + + WriteAsOperandInternal(Out, CS->getOperand(i), &TypePrinter, Machine, + Context); + } + Out << ' '; + } + + Out << '}'; + if (CS->getType()->isPacked()) + Out << '>'; + return; + } + + if (isa(CV) || isa(CV)) { + Type *ETy = CV->getType()->getVectorElementType(); + Out << '<'; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CV->getAggregateElement(0U), &TypePrinter, + Machine, Context); + for (unsigned i = 1, e = CV->getType()->getVectorNumElements(); i != e;++i){ + Out << ", "; + TypePrinter.print(ETy, Out); + Out << ' '; + WriteAsOperandInternal(Out, CV->getAggregateElement(i), &TypePrinter, + Machine, Context); + } + Out << '>'; + return; + } + + if (isa(CV)) { + Out << "null"; + return; + } + + if (isa(CV)) { + Out << "undef"; + return; + } + + if (const ConstantExpr *CE = dyn_cast(CV)) { + Out << CE->getOpcodeName(); + WriteOptimizationInfo(Out, CE); + if (CE->isCompare()) + Out << ' ' << getPredicateText(CE->getPredicate()); + Out << " ("; + + for (User::const_op_iterator OI=CE->op_begin(); OI != CE->op_end(); ++OI) { + TypePrinter.print((*OI)->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, *OI, &TypePrinter, Machine, Context); + if (OI+1 != CE->op_end()) + Out << ", "; + } + + if (CE->hasIndices()) { + ArrayRef Indices = CE->getIndices(); + for (unsigned i = 0, e = Indices.size(); i != e; ++i) + Out << ", " << Indices[i]; + } + + if (CE->isCast()) { + Out << " to "; + TypePrinter.print(CE->getType(), Out); + } + + Out << ')'; + return; + } + + Out << ""; +} + +static void WriteMDNodeBodyInternal(raw_ostream &Out, const MDNode *Node, + TypePrinting *TypePrinter, + SlotTracker *Machine, + const Module *Context) { + Out << "!{"; + for (unsigned mi = 0, me = Node->getNumOperands(); mi != me; ++mi) { + const Value *V = Node->getOperand(mi); + if (V == 0) + Out << "null"; + else { + TypePrinter->print(V->getType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, Node->getOperand(mi), + TypePrinter, Machine, Context); + } + if (mi + 1 != me) + Out << ", "; + } + + Out << "}"; +} + + +/// WriteAsOperand - Write the name of the specified value out to the specified +/// ostream. This can be useful when you just want to print int %reg126, not +/// the whole instruction that generated it. +/// +static void WriteAsOperandInternal(raw_ostream &Out, const Value *V, + TypePrinting *TypePrinter, + SlotTracker *Machine, + const Module *Context) { + if (V->hasName()) { + PrintLLVMName(Out, V); + return; + } + + const Constant *CV = dyn_cast(V); + if (CV && !isa(CV)) { + assert(TypePrinter && "Constants require TypePrinting!"); + WriteConstantInternal(Out, CV, *TypePrinter, Machine, Context); + return; + } + + if (const InlineAsm *IA = dyn_cast(V)) { + Out << "asm "; + if (IA->hasSideEffects()) + Out << "sideeffect "; + if (IA->isAlignStack()) + Out << "alignstack "; + // We don't emit the AD_ATT dialect as it's the assumed default. + if (IA->getDialect() == InlineAsm::AD_Intel) + Out << "inteldialect "; + Out << '"'; + PrintEscapedString(IA->getAsmString(), Out); + Out << "\", \""; + PrintEscapedString(IA->getConstraintString(), Out); + Out << '"'; + return; + } + + if (const MDNode *N = dyn_cast(V)) { + if (N->isFunctionLocal()) { + // Print metadata inline, not via slot reference number. + WriteMDNodeBodyInternal(Out, N, TypePrinter, Machine, Context); + return; + } + + if (!Machine) { + if (N->isFunctionLocal()) + Machine = new SlotTracker(N->getFunction()); + else + Machine = new SlotTracker(Context); + } + int Slot = Machine->getMetadataSlot(N); + if (Slot == -1) + Out << ""; + else + Out << '!' << Slot; + return; + } + + if (const MDString *MDS = dyn_cast(V)) { + Out << "!\""; + PrintEscapedString(MDS->getString(), Out); + Out << '"'; + return; + } + + if (V->getValueID() == Value::PseudoSourceValueVal || + V->getValueID() == Value::FixedStackPseudoSourceValueVal) { + V->print(Out); + return; + } + + char Prefix = '%'; + int Slot; + // If we have a SlotTracker, use it. + if (Machine) { + if (const GlobalValue *GV = dyn_cast(V)) { + Slot = Machine->getGlobalSlot(GV); + Prefix = '@'; + } else { + Slot = Machine->getLocalSlot(V); + + // If the local value didn't succeed, then we may be referring to a value + // from a different function. Translate it, as this can happen when using + // address of blocks. + if (Slot == -1) + if ((Machine = createSlotTracker(V))) { + Slot = Machine->getLocalSlot(V); + delete Machine; + } + } + } else if ((Machine = createSlotTracker(V))) { + // Otherwise, create one to get the # and then destroy it. + if (const GlobalValue *GV = dyn_cast(V)) { + Slot = Machine->getGlobalSlot(GV); + Prefix = '@'; + } else { + Slot = Machine->getLocalSlot(V); + } + delete Machine; + Machine = 0; + } else { + Slot = -1; + } + + if (Slot != -1) + Out << Prefix << Slot; + else + Out << ""; +} + +void llvm::WriteAsOperand(raw_ostream &Out, const Value *V, + bool PrintType, const Module *Context) { + + // Fast path: Don't construct and populate a TypePrinting object if we + // won't be needing any types printed. + if (!PrintType && + ((!isa(V) && !isa(V)) || + V->hasName() || isa(V))) { + WriteAsOperandInternal(Out, V, 0, 0, Context); + return; + } + + if (Context == 0) Context = getModuleFromVal(V); + + TypePrinting TypePrinter; + if (Context) + TypePrinter.incorporateTypes(*Context); + if (PrintType) { + TypePrinter.print(V->getType(), Out); + Out << ' '; + } + + WriteAsOperandInternal(Out, V, &TypePrinter, 0, Context); +} + +namespace { + +class AssemblyWriter { + formatted_raw_ostream &Out; + SlotTracker &Machine; + const Module *TheModule; + TypePrinting TypePrinter; + AssemblyAnnotationWriter *AnnotationWriter; + +public: + inline AssemblyWriter(formatted_raw_ostream &o, SlotTracker &Mac, + const Module *M, + AssemblyAnnotationWriter *AAW) + : Out(o), Machine(Mac), TheModule(M), AnnotationWriter(AAW) { + if (M) + TypePrinter.incorporateTypes(*M); + } + + void printMDNodeBody(const MDNode *MD); + void printNamedMDNode(const NamedMDNode *NMD); + + void printModule(const Module *M); + + void writeOperand(const Value *Op, bool PrintType); + void writeParamOperand(const Value *Operand, AttributeSet Attrs,unsigned Idx); + void writeAtomic(AtomicOrdering Ordering, SynchronizationScope SynchScope); + + void writeAllMDNodes(); + void writeAllAttributeGroups(); + + void printTypeIdentities(); + void printGlobal(const GlobalVariable *GV); + void printAlias(const GlobalAlias *GV); + void printFunction(const Function *F); + void printArgument(const Argument *FA, AttributeSet Attrs, unsigned Idx); + void printBasicBlock(const BasicBlock *BB); + void printInstruction(const Instruction &I); + +private: + // printInfoComment - Print a little comment after the instruction indicating + // which slot it occupies. + void printInfoComment(const Value &V); +}; +} // end of anonymous namespace + +void AssemblyWriter::writeOperand(const Value *Operand, bool PrintType) { + if (Operand == 0) { + Out << ""; + return; + } + if (PrintType) { + TypePrinter.print(Operand->getType(), Out); + Out << ' '; + } + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); +} + +void AssemblyWriter::writeAtomic(AtomicOrdering Ordering, + SynchronizationScope SynchScope) { + if (Ordering == NotAtomic) + return; + + switch (SynchScope) { + case SingleThread: Out << " singlethread"; break; + case CrossThread: break; + } + + switch (Ordering) { + default: Out << " "; break; + case Unordered: Out << " unordered"; break; + case Monotonic: Out << " monotonic"; break; + case Acquire: Out << " acquire"; break; + case Release: Out << " release"; break; + case AcquireRelease: Out << " acq_rel"; break; + case SequentiallyConsistent: Out << " seq_cst"; break; + } +} + +void AssemblyWriter::writeParamOperand(const Value *Operand, + AttributeSet Attrs, unsigned Idx) { + if (Operand == 0) { + Out << ""; + return; + } + + // Print the type + TypePrinter.print(Operand->getType(), Out); + // Print parameter attributes list + if (Attrs.hasAttributes(Idx)) + Out << ' ' << Attrs.getAsString(Idx); + Out << ' '; + // Print the operand + WriteAsOperandInternal(Out, Operand, &TypePrinter, &Machine, TheModule); +} + +void AssemblyWriter::printModule(const Module *M) { + Machine.initialize(); + + if (!M->getModuleIdentifier().empty() && + // Don't print the ID if it will start a new line (which would + // require a comment char before it). + M->getModuleIdentifier().find('\n') == std::string::npos) + Out << "; ModuleID = '" << M->getModuleIdentifier() << "'\n"; + + if (!M->getDataLayout().empty()) + Out << "target datalayout = \"" << M->getDataLayout() << "\"\n"; + if (!M->getTargetTriple().empty()) + Out << "target triple = \"" << M->getTargetTriple() << "\"\n"; + + if (!M->getModuleInlineAsm().empty()) { + // Split the string into lines, to make it easier to read the .ll file. + std::string Asm = M->getModuleInlineAsm(); + size_t CurPos = 0; + size_t NewLine = Asm.find_first_of('\n', CurPos); + Out << '\n'; + while (NewLine != std::string::npos) { + // We found a newline, print the portion of the asm string from the + // last newline up to this newline. + Out << "module asm \""; + PrintEscapedString(std::string(Asm.begin()+CurPos, Asm.begin()+NewLine), + Out); + Out << "\"\n"; + CurPos = NewLine+1; + NewLine = Asm.find_first_of('\n', CurPos); + } + std::string rest(Asm.begin()+CurPos, Asm.end()); + if (!rest.empty()) { + Out << "module asm \""; + PrintEscapedString(rest, Out); + Out << "\"\n"; + } + } + + printTypeIdentities(); + + // Output all globals. + if (!M->global_empty()) Out << '\n'; + for (Module::const_global_iterator I = M->global_begin(), E = M->global_end(); + I != E; ++I) { + printGlobal(I); Out << '\n'; + } + + // Output all aliases. + if (!M->alias_empty()) Out << "\n"; + for (Module::const_alias_iterator I = M->alias_begin(), E = M->alias_end(); + I != E; ++I) + printAlias(I); + + // Output all of the functions. + for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) + printFunction(I); + + // Output all attribute groups. + if (!Machine.as_empty()) { + Out << '\n'; + writeAllAttributeGroups(); + } + + // Output named metadata. + if (!M->named_metadata_empty()) Out << '\n'; + + for (Module::const_named_metadata_iterator I = M->named_metadata_begin(), + E = M->named_metadata_end(); I != E; ++I) + printNamedMDNode(I); + + // Output metadata. + if (!Machine.mdn_empty()) { + Out << '\n'; + writeAllMDNodes(); + } +} + +void AssemblyWriter::printNamedMDNode(const NamedMDNode *NMD) { + Out << '!'; + StringRef Name = NMD->getName(); + if (Name.empty()) { + Out << " "; + } else { + if (isalpha(static_cast(Name[0])) || + Name[0] == '-' || Name[0] == '$' || + Name[0] == '.' || Name[0] == '_') + Out << Name[0]; + else + Out << '\\' << hexdigit(Name[0] >> 4) << hexdigit(Name[0] & 0x0F); + for (unsigned i = 1, e = Name.size(); i != e; ++i) { + unsigned char C = Name[i]; + if (isalnum(static_cast(C)) || C == '-' || C == '$' || + C == '.' || C == '_') + Out << C; + else + Out << '\\' << hexdigit(C >> 4) << hexdigit(C & 0x0F); + } + } + Out << " = !{"; + for (unsigned i = 0, e = NMD->getNumOperands(); i != e; ++i) { + if (i) Out << ", "; + int Slot = Machine.getMetadataSlot(NMD->getOperand(i)); + if (Slot == -1) + Out << ""; + else + Out << '!' << Slot; + } + Out << "}\n"; +} + + +static void PrintLinkage(GlobalValue::LinkageTypes LT, + formatted_raw_ostream &Out) { + switch (LT) { + case GlobalValue::ExternalLinkage: break; + case GlobalValue::PrivateLinkage: Out << "private "; break; + case GlobalValue::LinkerPrivateLinkage: Out << "linker_private "; break; + case GlobalValue::LinkerPrivateWeakLinkage: + Out << "linker_private_weak "; + break; + case GlobalValue::InternalLinkage: Out << "internal "; break; + case GlobalValue::LinkOnceAnyLinkage: Out << "linkonce "; break; + case GlobalValue::LinkOnceODRLinkage: Out << "linkonce_odr "; break; + case GlobalValue::LinkOnceODRAutoHideLinkage: + Out << "linkonce_odr_auto_hide "; + break; + case GlobalValue::WeakAnyLinkage: Out << "weak "; break; + case GlobalValue::WeakODRLinkage: Out << "weak_odr "; break; + case GlobalValue::CommonLinkage: Out << "common "; break; + case GlobalValue::AppendingLinkage: Out << "appending "; break; + case GlobalValue::DLLImportLinkage: Out << "dllimport "; break; + case GlobalValue::DLLExportLinkage: Out << "dllexport "; break; + case GlobalValue::ExternalWeakLinkage: Out << "extern_weak "; break; + case GlobalValue::AvailableExternallyLinkage: + Out << "available_externally "; + break; + } +} + + +static void PrintVisibility(GlobalValue::VisibilityTypes Vis, + formatted_raw_ostream &Out) { + switch (Vis) { + case GlobalValue::DefaultVisibility: break; + case GlobalValue::HiddenVisibility: Out << "hidden "; break; + case GlobalValue::ProtectedVisibility: Out << "protected "; break; + } +} + +static void PrintThreadLocalModel(GlobalVariable::ThreadLocalMode TLM, + formatted_raw_ostream &Out) { + switch (TLM) { + case GlobalVariable::NotThreadLocal: + break; + case GlobalVariable::GeneralDynamicTLSModel: + Out << "thread_local "; + break; + case GlobalVariable::LocalDynamicTLSModel: + Out << "thread_local(localdynamic) "; + break; + case GlobalVariable::InitialExecTLSModel: + Out << "thread_local(initialexec) "; + break; + case GlobalVariable::LocalExecTLSModel: + Out << "thread_local(localexec) "; + break; + } +} + +void AssemblyWriter::printGlobal(const GlobalVariable *GV) { + if (GV->isMaterializable()) + Out << "; Materializable\n"; + + WriteAsOperandInternal(Out, GV, &TypePrinter, &Machine, GV->getParent()); + Out << " = "; + + if (!GV->hasInitializer() && GV->hasExternalLinkage()) + Out << "external "; + + PrintLinkage(GV->getLinkage(), Out); + PrintVisibility(GV->getVisibility(), Out); + PrintThreadLocalModel(GV->getThreadLocalMode(), Out); + + if (unsigned AddressSpace = GV->getType()->getAddressSpace()) + Out << "addrspace(" << AddressSpace << ") "; + if (GV->hasUnnamedAddr()) Out << "unnamed_addr "; + if (GV->isExternallyInitialized()) Out << "externally_initialized "; + Out << (GV->isConstant() ? "constant " : "global "); + TypePrinter.print(GV->getType()->getElementType(), Out); + + if (GV->hasInitializer()) { + Out << ' '; + writeOperand(GV->getInitializer(), false); + } + + if (GV->hasSection()) { + Out << ", section \""; + PrintEscapedString(GV->getSection(), Out); + Out << '"'; + } + if (GV->getAlignment()) + Out << ", align " << GV->getAlignment(); + + printInfoComment(*GV); +} + +void AssemblyWriter::printAlias(const GlobalAlias *GA) { + if (GA->isMaterializable()) + Out << "; Materializable\n"; + + // Don't crash when dumping partially built GA + if (!GA->hasName()) + Out << "<> = "; + else { + PrintLLVMName(Out, GA); + Out << " = "; + } + PrintVisibility(GA->getVisibility(), Out); + + Out << "alias "; + + PrintLinkage(GA->getLinkage(), Out); + + const Constant *Aliasee = GA->getAliasee(); + + if (Aliasee == 0) { + TypePrinter.print(GA->getType(), Out); + Out << " <>"; + } else { + writeOperand(Aliasee, !isa(Aliasee)); + } + + printInfoComment(*GA); + Out << '\n'; +} + +void AssemblyWriter::printTypeIdentities() { + if (TypePrinter.NumberedTypes.empty() && + TypePrinter.NamedTypes.empty()) + return; + + Out << '\n'; + + // We know all the numbers that each type is used and we know that it is a + // dense assignment. Convert the map to an index table. + std::vector NumberedTypes(TypePrinter.NumberedTypes.size()); + for (DenseMap::iterator I = + TypePrinter.NumberedTypes.begin(), E = TypePrinter.NumberedTypes.end(); + I != E; ++I) { + assert(I->second < NumberedTypes.size() && "Didn't get a dense numbering?"); + NumberedTypes[I->second] = I->first; + } + + // Emit all numbered types. + for (unsigned i = 0, e = NumberedTypes.size(); i != e; ++i) { + Out << '%' << i << " = type "; + + // Make sure we print out at least one level of the type structure, so + // that we do not get %2 = type %2 + TypePrinter.printStructBody(NumberedTypes[i], Out); + Out << '\n'; + } + + for (unsigned i = 0, e = TypePrinter.NamedTypes.size(); i != e; ++i) { + PrintLLVMName(Out, TypePrinter.NamedTypes[i]->getName(), LocalPrefix); + Out << " = type "; + + // Make sure we print out at least one level of the type structure, so + // that we do not get %FILE = type %FILE + TypePrinter.printStructBody(TypePrinter.NamedTypes[i], Out); + Out << '\n'; + } +} + +/// printFunction - Print all aspects of a function. +/// +void AssemblyWriter::printFunction(const Function *F) { + // Print out the return type and name. + Out << '\n'; + + if (AnnotationWriter) AnnotationWriter->emitFunctionAnnot(F, Out); + + if (F->isMaterializable()) + Out << "; Materializable\n"; + + if (F->isDeclaration()) + Out << "declare "; + else + Out << "define "; + + PrintLinkage(F->getLinkage(), Out); + PrintVisibility(F->getVisibility(), Out); + + // Print the calling convention. + if (F->getCallingConv() != CallingConv::C) { + PrintCallingConv(F->getCallingConv(), Out); + Out << " "; + } + + FunctionType *FT = F->getFunctionType(); + const AttributeSet &Attrs = F->getAttributes(); + if (Attrs.hasAttributes(AttributeSet::ReturnIndex)) + Out << Attrs.getAsString(AttributeSet::ReturnIndex) << ' '; + TypePrinter.print(F->getReturnType(), Out); + Out << ' '; + WriteAsOperandInternal(Out, F, &TypePrinter, &Machine, F->getParent()); + Out << '('; + Machine.incorporateFunction(F); + + // Loop over the arguments, printing them... + + unsigned Idx = 1; + if (!F->isDeclaration()) { + // If this isn't a declaration, print the argument names as well. + for (Function::const_arg_iterator I = F->arg_begin(), E = F->arg_end(); + I != E; ++I) { + // Insert commas as we go... the first arg doesn't get a comma + if (I != F->arg_begin()) Out << ", "; + printArgument(I, Attrs, Idx); + Idx++; + } + } else { + // Otherwise, print the types from the function type. + for (unsigned i = 0, e = FT->getNumParams(); i != e; ++i) { + // Insert commas as we go... the first arg doesn't get a comma + if (i) Out << ", "; + + // Output type... + TypePrinter.print(FT->getParamType(i), Out); + + if (Attrs.hasAttributes(i+1)) + Out << ' ' << Attrs.getAsString(i+1); + } + } + + // Finish printing arguments... + if (FT->isVarArg()) { + if (FT->getNumParams()) Out << ", "; + Out << "..."; // Output varargs portion of signature! + } + Out << ')'; + if (F->hasUnnamedAddr()) + Out << " unnamed_addr"; + if (Attrs.hasAttributes(AttributeSet::FunctionIndex)) + Out << " #" << Machine.getAttributeGroupSlot(Attrs.getFnAttributes()); + if (F->hasSection()) { + Out << " section \""; + PrintEscapedString(F->getSection(), Out); + Out << '"'; + } + if (F->getAlignment()) + Out << " align " << F->getAlignment(); + if (F->hasGC()) + Out << " gc \"" << F->getGC() << '"'; + if (F->isDeclaration()) { + Out << '\n'; + } else { + Out << " {"; + // Output all of the function's basic blocks. + for (Function::const_iterator I = F->begin(), E = F->end(); I != E; ++I) + printBasicBlock(I); + + Out << "}\n"; + } + + Machine.purgeFunction(); +} + +/// printArgument - This member is called for every argument that is passed into +/// the function. Simply print it out +/// +void AssemblyWriter::printArgument(const Argument *Arg, + AttributeSet Attrs, unsigned Idx) { + // Output type... + TypePrinter.print(Arg->getType(), Out); + + // Output parameter attributes list + if (Attrs.hasAttributes(Idx)) + Out << ' ' << Attrs.getAsString(Idx); + + // Output name, if available... + if (Arg->hasName()) { + Out << ' '; + PrintLLVMName(Out, Arg); + } +} + +/// printBasicBlock - This member is called for each basic block in a method. +/// +void AssemblyWriter::printBasicBlock(const BasicBlock *BB) { + if (BB->hasName()) { // Print out the label if it exists... + Out << "\n"; + PrintLLVMName(Out, BB->getName(), LabelPrefix); + Out << ':'; + } else if (!BB->use_empty()) { // Don't print block # of no uses... + Out << "\n;